1be1d678aSKris Buschelman #define PETSCMAT_DLL 2be1d678aSKris Buschelman 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 77c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h" 8325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 93b547af2SSatish Balay 10c60f0209SBarry Smith #include "../src/mat/blockinvert.h" 11b01c7715SBarry Smith 12b01c7715SBarry Smith #undef __FUNCT__ 1343516a2dSKris Buschelman #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal" 14bc08b0f1SBarry Smith /*@ 1543516a2dSKris Buschelman MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries. 1643516a2dSKris Buschelman 1743516a2dSKris Buschelman Collective on Mat 1843516a2dSKris Buschelman 1943516a2dSKris Buschelman Input Parameters: 2043516a2dSKris Buschelman . mat - the matrix 2143516a2dSKris Buschelman 2243516a2dSKris Buschelman Level: advanced 2343516a2dSKris Buschelman @*/ 2446129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat) 2543516a2dSKris Buschelman { 2643516a2dSKris Buschelman PetscErrorCode ierr,(*f)(Mat); 2743516a2dSKris Buschelman 2843516a2dSKris Buschelman PetscFunctionBegin; 2943516a2dSKris Buschelman PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 3043516a2dSKris Buschelman if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 3143516a2dSKris Buschelman if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 3243516a2dSKris Buschelman 3343516a2dSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr); 3443516a2dSKris Buschelman if (f) { 3543516a2dSKris Buschelman ierr = (*f)(mat);CHKERRQ(ierr); 3643516a2dSKris Buschelman } else { 3743516a2dSKris Buschelman SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ."); 3843516a2dSKris Buschelman } 3943516a2dSKris Buschelman PetscFunctionReturn(0); 4043516a2dSKris Buschelman } 4143516a2dSKris Buschelman 4243516a2dSKris Buschelman EXTERN_C_BEGIN 4343516a2dSKris Buschelman #undef __FUNCT__ 44b01c7715SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 4546129b97SKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A) 46b01c7715SBarry Smith { 47b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 486849ba73SBarry Smith PetscErrorCode ierr; 49d0f46423SBarry Smith PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs; 5097e5c40aSBarry Smith MatScalar *v = a->a,*odiag,*diag,*mdiag; 5162bba022SBarry Smith PetscReal shift = 0.0; 52b01c7715SBarry Smith 53b01c7715SBarry Smith PetscFunctionBegin; 54b01c7715SBarry Smith if (a->idiagvalid) PetscFunctionReturn(0); 55b01c7715SBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 56b01c7715SBarry Smith diag_offset = a->diag; 57b01c7715SBarry Smith if (!a->idiag) { 58b01c7715SBarry Smith ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr); 59b01c7715SBarry Smith } 60b01c7715SBarry Smith diag = a->idiag; 61b01c7715SBarry Smith mdiag = a->idiag+bs*bs*mbs; 62b01c7715SBarry Smith /* factor and invert each block */ 63521d7252SBarry Smith switch (bs){ 64ab040260SJed Brown case 1: 65ab040260SJed Brown for (i=0; i<mbs; i++) { 66ab040260SJed Brown odiag = v + 1*diag_offset[i]; 67ab040260SJed Brown diag[0] = odiag[0]; 68ab040260SJed Brown mdiag[0] = odiag[0]; 69ab040260SJed Brown diag[0] = 1.0 / (diag[0] + shift); 70ab040260SJed Brown diag += 1; 71ab040260SJed Brown mdiag += 1; 72ab040260SJed Brown } 73ab040260SJed Brown break; 74b01c7715SBarry Smith case 2: 75b01c7715SBarry Smith for (i=0; i<mbs; i++) { 76b01c7715SBarry Smith odiag = v + 4*diag_offset[i]; 77b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 7962bba022SBarry Smith ierr = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 80b01c7715SBarry Smith diag += 4; 81b01c7715SBarry Smith mdiag += 4; 82b01c7715SBarry Smith } 83b01c7715SBarry Smith break; 84b01c7715SBarry Smith case 3: 85b01c7715SBarry Smith for (i=0; i<mbs; i++) { 86b01c7715SBarry Smith odiag = v + 9*diag_offset[i]; 87b01c7715SBarry Smith diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 88b01c7715SBarry Smith diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 89b01c7715SBarry Smith diag[8] = odiag[8]; 90b01c7715SBarry Smith mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 91b01c7715SBarry Smith mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 92b01c7715SBarry Smith mdiag[8] = odiag[8]; 9362bba022SBarry Smith ierr = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr); 94b01c7715SBarry Smith diag += 9; 95b01c7715SBarry Smith mdiag += 9; 96b01c7715SBarry Smith } 97b01c7715SBarry Smith break; 98b01c7715SBarry Smith case 4: 99b01c7715SBarry Smith for (i=0; i<mbs; i++) { 100b01c7715SBarry Smith odiag = v + 16*diag_offset[i]; 101b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 102b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 10362bba022SBarry Smith ierr = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 104b01c7715SBarry Smith diag += 16; 105b01c7715SBarry Smith mdiag += 16; 106b01c7715SBarry Smith } 107b01c7715SBarry Smith break; 108b01c7715SBarry Smith case 5: 109b01c7715SBarry Smith for (i=0; i<mbs; i++) { 110b01c7715SBarry Smith odiag = v + 25*diag_offset[i]; 111b01c7715SBarry Smith ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 112b01c7715SBarry Smith ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 11362bba022SBarry Smith ierr = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr); 114b01c7715SBarry Smith diag += 25; 115b01c7715SBarry Smith mdiag += 25; 116b01c7715SBarry Smith } 117b01c7715SBarry Smith break; 118d49b2adcSBarry Smith case 6: 119d49b2adcSBarry Smith for (i=0; i<mbs; i++) { 120d49b2adcSBarry Smith odiag = v + 36*diag_offset[i]; 121d49b2adcSBarry Smith ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 122d49b2adcSBarry Smith ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 123d49b2adcSBarry Smith ierr = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 124d49b2adcSBarry Smith diag += 36; 125d49b2adcSBarry Smith mdiag += 36; 126d49b2adcSBarry Smith } 127d49b2adcSBarry Smith break; 128b01c7715SBarry Smith default: 129521d7252SBarry Smith SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs); 130b01c7715SBarry Smith } 131b01c7715SBarry Smith a->idiagvalid = PETSC_TRUE; 132b01c7715SBarry Smith PetscFunctionReturn(0); 133b01c7715SBarry Smith } 13443516a2dSKris Buschelman EXTERN_C_END 135b01c7715SBarry Smith 136b01c7715SBarry Smith #undef __FUNCT__ 1376d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_1" 1386d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1396d3beeddSMatthew Knepley { 1406d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1416d3beeddSMatthew Knepley PetscScalar *x,x1,s1; 142dd6ea824SBarry Smith const PetscScalar *b; 143dd6ea824SBarry Smith const MatScalar *aa = a->a, *idiag,*mdiag,*v; 1446d3beeddSMatthew Knepley PetscErrorCode ierr; 1456d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 1466d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 1476d3beeddSMatthew Knepley 1486d3beeddSMatthew Knepley PetscFunctionBegin; 1496d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1506d3beeddSMatthew Knepley its = its*lits; 1516d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1526d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 1536d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 1546d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 1556d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 1566d3beeddSMatthew Knepley 1576d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 1586d3beeddSMatthew Knepley 1596d3beeddSMatthew Knepley diag = a->diag; 1606d3beeddSMatthew Knepley idiag = a->idiag; 1616d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1626d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1636d3beeddSMatthew Knepley 1646d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 1656d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1666d3beeddSMatthew Knepley x[0] = b[0]*idiag[0]; 1676d3beeddSMatthew Knepley i2 = 1; 1686d3beeddSMatthew Knepley idiag += 1; 1696d3beeddSMatthew Knepley for (i=1; i<m; i++) { 1706d3beeddSMatthew Knepley v = aa + ai[i]; 1716d3beeddSMatthew Knepley vi = aj + ai[i]; 1726d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 1736d3beeddSMatthew Knepley s1 = b[i2]; 1746d3beeddSMatthew Knepley while (nz--) { 1756d3beeddSMatthew Knepley idx = (*vi++); 1766d3beeddSMatthew Knepley x1 = x[idx]; 1776d3beeddSMatthew Knepley s1 -= v[0]*x1; 1786d3beeddSMatthew Knepley v += 1; 1796d3beeddSMatthew Knepley } 1806d3beeddSMatthew Knepley x[i2] = idiag[0]*s1; 1816d3beeddSMatthew Knepley idiag += 1; 1826d3beeddSMatthew Knepley i2 += 1; 1836d3beeddSMatthew Knepley } 1846d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 1856d3beeddSMatthew Knepley ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 1866d3beeddSMatthew Knepley } 1876d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 1886d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 1896d3beeddSMatthew Knepley i2 = 0; 1906d3beeddSMatthew Knepley mdiag = a->idiag+a->mbs; 1916d3beeddSMatthew Knepley for (i=0; i<m; i++) { 1926d3beeddSMatthew Knepley x1 = x[i2]; 1936d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1; 1946d3beeddSMatthew Knepley mdiag += 1; 1956d3beeddSMatthew Knepley i2 += 1; 1966d3beeddSMatthew Knepley } 1976d3beeddSMatthew Knepley ierr = PetscLogFlops(m);CHKERRQ(ierr); 1986d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 199d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 2006d3beeddSMatthew Knepley } 2016d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2026d3beeddSMatthew Knepley idiag = a->idiag+a->mbs - 1; 2036d3beeddSMatthew Knepley i2 = m - 1; 2046d3beeddSMatthew Knepley x1 = x[i2]; 2056d3beeddSMatthew Knepley x[i2] = idiag[0]*x1; 2066d3beeddSMatthew Knepley idiag -= 1; 2076d3beeddSMatthew Knepley i2 -= 1; 2086d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 2096d3beeddSMatthew Knepley v = aa + (diag[i]+1); 2106d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 2116d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 2126d3beeddSMatthew Knepley s1 = x[i2]; 2136d3beeddSMatthew Knepley while (nz--) { 2146d3beeddSMatthew Knepley idx = (*vi++); 2156d3beeddSMatthew Knepley x1 = x[idx]; 2166d3beeddSMatthew Knepley s1 -= v[0]*x1; 2176d3beeddSMatthew Knepley v += 1; 2186d3beeddSMatthew Knepley } 2196d3beeddSMatthew Knepley x[i2] = idiag[0]*s1; 220ab040260SJed Brown idiag -= 1; 2216d3beeddSMatthew Knepley i2 -= 1; 2226d3beeddSMatthew Knepley } 2236d3beeddSMatthew Knepley ierr = PetscLogFlops(a->nz);CHKERRQ(ierr); 2246d3beeddSMatthew Knepley } 2256d3beeddSMatthew Knepley } else { 2266d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 2276d3beeddSMatthew Knepley } 2286d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2296d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 2306d3beeddSMatthew Knepley PetscFunctionReturn(0); 2316d3beeddSMatthew Knepley } 2326d3beeddSMatthew Knepley 2336d3beeddSMatthew Knepley #undef __FUNCT__ 234b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_2" 235c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 236b01c7715SBarry Smith { 237b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 238b01c7715SBarry Smith PetscScalar *x,x1,x2,s1,s2; 239dd6ea824SBarry Smith const PetscScalar *b; 240dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 241dfbe8321SBarry Smith PetscErrorCode ierr; 242c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 243c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 244b01c7715SBarry Smith 245b01c7715SBarry Smith PetscFunctionBegin; 24651f519a2SBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 247b01c7715SBarry Smith its = its*lits; 24877431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 249b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 250b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 25171f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 252b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 253b01c7715SBarry Smith 254b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 255b01c7715SBarry Smith 256b01c7715SBarry Smith diag = a->diag; 257b01c7715SBarry Smith idiag = a->idiag; 2581ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2591ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 260b01c7715SBarry Smith 261b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 262b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 263b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[2]; 264b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[3]; 265b01c7715SBarry Smith i2 = 2; 266b01c7715SBarry Smith idiag += 4; 267b01c7715SBarry Smith for (i=1; i<m; i++) { 268b01c7715SBarry Smith v = aa + 4*ai[i]; 269b01c7715SBarry Smith vi = aj + ai[i]; 270b01c7715SBarry Smith nz = diag[i] - ai[i]; 271b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; 272b01c7715SBarry Smith while (nz--) { 273b01c7715SBarry Smith idx = 2*(*vi++); 274b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 275b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 276b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 277b01c7715SBarry Smith v += 4; 278b01c7715SBarry Smith } 279b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 280b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 281b01c7715SBarry Smith idiag += 4; 282b01c7715SBarry Smith i2 += 2; 283b01c7715SBarry Smith } 284b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 285dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 286b01c7715SBarry Smith } 287b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 288b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 289b01c7715SBarry Smith i2 = 0; 290b01c7715SBarry Smith mdiag = a->idiag+4*a->mbs; 291b01c7715SBarry Smith for (i=0; i<m; i++) { 292b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 293b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[2]*x2; 294b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2; 295b01c7715SBarry Smith mdiag += 4; 296b01c7715SBarry Smith i2 += 2; 297b01c7715SBarry Smith } 298dc0b31edSSatish Balay ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr); 299b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 300d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 301b01c7715SBarry Smith } 302b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 303b01c7715SBarry Smith idiag = a->idiag+4*a->mbs - 4; 304b01c7715SBarry Smith i2 = 2*m - 2; 305b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; 306b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[2]*x2; 307b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[3]*x2; 308b01c7715SBarry Smith idiag -= 4; 309b01c7715SBarry Smith i2 -= 2; 310b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 311b01c7715SBarry Smith v = aa + 4*(diag[i]+1); 312b01c7715SBarry Smith vi = aj + diag[i] + 1; 313b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 314b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; 315b01c7715SBarry Smith while (nz--) { 316b01c7715SBarry Smith idx = 2*(*vi++); 317b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; 318b01c7715SBarry Smith s1 -= v[0]*x1 + v[2]*x2; 319b01c7715SBarry Smith s2 -= v[1]*x1 + v[3]*x2; 320b01c7715SBarry Smith v += 4; 321b01c7715SBarry Smith } 322b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[2]*s2; 323b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[3]*s2; 324b01c7715SBarry Smith idiag -= 4; 325b01c7715SBarry Smith i2 -= 2; 326b01c7715SBarry Smith } 327dc0b31edSSatish Balay ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr); 328b01c7715SBarry Smith } 329b01c7715SBarry Smith } else { 330634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 331b01c7715SBarry Smith } 3321ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3331ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 334b01c7715SBarry Smith PetscFunctionReturn(0); 335b01c7715SBarry Smith } 336b01c7715SBarry Smith 337b01c7715SBarry Smith #undef __FUNCT__ 338b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_3" 339c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 340b01c7715SBarry Smith { 341b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 342b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,s1,s2,s3; 343dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 344dd6ea824SBarry Smith const PetscScalar *b; 345dfbe8321SBarry Smith PetscErrorCode ierr; 346c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 347c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 348b01c7715SBarry Smith 349b01c7715SBarry Smith PetscFunctionBegin; 350b01c7715SBarry Smith its = its*lits; 35171f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 35277431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 353b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 354b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 35571f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 356b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 357b01c7715SBarry Smith 358b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 359b01c7715SBarry Smith 360b01c7715SBarry Smith diag = a->diag; 361b01c7715SBarry Smith idiag = a->idiag; 3621ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3631ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 364b01c7715SBarry Smith 365b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 366b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 367b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6]; 368b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7]; 369b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8]; 370b01c7715SBarry Smith i2 = 3; 371b01c7715SBarry Smith idiag += 9; 372b01c7715SBarry Smith for (i=1; i<m; i++) { 373b01c7715SBarry Smith v = aa + 9*ai[i]; 374b01c7715SBarry Smith vi = aj + ai[i]; 375b01c7715SBarry Smith nz = diag[i] - ai[i]; 376b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; 377b01c7715SBarry Smith while (nz--) { 378b01c7715SBarry Smith idx = 3*(*vi++); 379b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx]; 380b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 381b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 382b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 383b01c7715SBarry Smith v += 9; 384b01c7715SBarry Smith } 385b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 386b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 387b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 388b01c7715SBarry Smith idiag += 9; 389b01c7715SBarry Smith i2 += 3; 390b01c7715SBarry Smith } 391b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 392dc0b31edSSatish Balay ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 393b01c7715SBarry Smith } 394b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 395b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 396b01c7715SBarry Smith i2 = 0; 397b01c7715SBarry Smith mdiag = a->idiag+9*a->mbs; 398b01c7715SBarry Smith for (i=0; i<m; i++) { 399b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 400b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3; 401b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3; 402b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; 403b01c7715SBarry Smith mdiag += 9; 404b01c7715SBarry Smith i2 += 3; 405b01c7715SBarry Smith } 406dc0b31edSSatish Balay ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr); 407b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 408d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 409b01c7715SBarry Smith } 410b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 411b01c7715SBarry Smith idiag = a->idiag+9*a->mbs - 9; 412b01c7715SBarry Smith i2 = 3*m - 3; 413b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; 414b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3; 415b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3; 416b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3; 417b01c7715SBarry Smith idiag -= 9; 418b01c7715SBarry Smith i2 -= 3; 419b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 420b01c7715SBarry Smith v = aa + 9*(diag[i]+1); 421b01c7715SBarry Smith vi = aj + diag[i] + 1; 422b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 423b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; 424b01c7715SBarry Smith while (nz--) { 425b01c7715SBarry Smith idx = 3*(*vi++); 426b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; 427b01c7715SBarry Smith s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 428b01c7715SBarry Smith s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 429b01c7715SBarry Smith s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 430b01c7715SBarry Smith v += 9; 431b01c7715SBarry Smith } 432b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3; 433b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3; 434b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; 435b01c7715SBarry Smith idiag -= 9; 436b01c7715SBarry Smith i2 -= 3; 437b01c7715SBarry Smith } 438dc0b31edSSatish Balay ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr); 439b01c7715SBarry Smith } 440b01c7715SBarry Smith } else { 441634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 442b01c7715SBarry Smith } 4431ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 4441ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 445b01c7715SBarry Smith PetscFunctionReturn(0); 446b01c7715SBarry Smith } 447b01c7715SBarry Smith 448b01c7715SBarry Smith #undef __FUNCT__ 449b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_4" 450c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 451b01c7715SBarry Smith { 452b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 453b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,s1,s2,s3,s4; 454dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 455dd6ea824SBarry Smith const PetscScalar *b; 456dfbe8321SBarry Smith PetscErrorCode ierr; 457c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 458c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 459b01c7715SBarry Smith 460b01c7715SBarry Smith PetscFunctionBegin; 46171f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 462b01c7715SBarry Smith its = its*lits; 46377431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 464b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 465b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 46671f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 467b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 468b01c7715SBarry Smith 469b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 470b01c7715SBarry Smith 471b01c7715SBarry Smith diag = a->diag; 472b01c7715SBarry Smith idiag = a->idiag; 4731ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 4741ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 475b01c7715SBarry Smith 476b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 477b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 478b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8] + b[3]*idiag[12]; 479b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9] + b[3]*idiag[13]; 480b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14]; 481b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15]; 482b01c7715SBarry Smith i2 = 4; 483b01c7715SBarry Smith idiag += 16; 484b01c7715SBarry Smith for (i=1; i<m; i++) { 485b01c7715SBarry Smith v = aa + 16*ai[i]; 486b01c7715SBarry Smith vi = aj + ai[i]; 487b01c7715SBarry Smith nz = diag[i] - ai[i]; 488b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; 489b01c7715SBarry Smith while (nz--) { 490b01c7715SBarry Smith idx = 4*(*vi++); 491b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 492b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 493b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 494b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 495b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 496b01c7715SBarry Smith v += 16; 497b01c7715SBarry Smith } 498b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 499b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 500b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 501b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 502b01c7715SBarry Smith idiag += 16; 503b01c7715SBarry Smith i2 += 4; 504b01c7715SBarry Smith } 505b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 506dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 507b01c7715SBarry Smith } 508b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 509b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 510b01c7715SBarry Smith i2 = 0; 511b01c7715SBarry Smith mdiag = a->idiag+16*a->mbs; 512b01c7715SBarry Smith for (i=0; i<m; i++) { 513b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 514b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3 + mdiag[12]*x4; 515b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3 + mdiag[13]*x4; 516b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4; 517b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4; 518b01c7715SBarry Smith mdiag += 16; 519b01c7715SBarry Smith i2 += 4; 520b01c7715SBarry Smith } 521dc0b31edSSatish Balay ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr); 522b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 523d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 524b01c7715SBarry Smith } 525b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 526b01c7715SBarry Smith idiag = a->idiag+16*a->mbs - 16; 527b01c7715SBarry Smith i2 = 4*m - 4; 528b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; 529b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3 + idiag[12]*x4; 530b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3 + idiag[13]*x4; 531b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4; 532b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4; 533b01c7715SBarry Smith idiag -= 16; 534b01c7715SBarry Smith i2 -= 4; 535b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 536b01c7715SBarry Smith v = aa + 16*(diag[i]+1); 537b01c7715SBarry Smith vi = aj + diag[i] + 1; 538b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 539b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; 540b01c7715SBarry Smith while (nz--) { 541b01c7715SBarry Smith idx = 4*(*vi++); 542b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; 543b01c7715SBarry Smith s1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 544b01c7715SBarry Smith s2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 545b01c7715SBarry Smith s3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 546b01c7715SBarry Smith s4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 547b01c7715SBarry Smith v += 16; 548b01c7715SBarry Smith } 549b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3 + idiag[12]*s4; 550b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3 + idiag[13]*s4; 551b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4; 552b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4; 553b01c7715SBarry Smith idiag -= 16; 554b01c7715SBarry Smith i2 -= 4; 555b01c7715SBarry Smith } 556dc0b31edSSatish Balay ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr); 557b01c7715SBarry Smith } 558b01c7715SBarry Smith } else { 559634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 560b01c7715SBarry Smith } 5611ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 5621ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 563b01c7715SBarry Smith PetscFunctionReturn(0); 564b01c7715SBarry Smith } 565b01c7715SBarry Smith 566b01c7715SBarry Smith #undef __FUNCT__ 567b01c7715SBarry Smith #define __FUNCT__ "MatPBRelax_SeqBAIJ_5" 568c1ac3661SBarry Smith PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 569b01c7715SBarry Smith { 570b01c7715SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 571b01c7715SBarry Smith PetscScalar *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5; 572dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 573dd6ea824SBarry Smith const PetscScalar *b; 574dfbe8321SBarry Smith PetscErrorCode ierr; 575c1ac3661SBarry Smith PetscInt m = a->mbs,i,i2,nz,idx; 576c1ac3661SBarry Smith const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 577b01c7715SBarry Smith 578b01c7715SBarry Smith PetscFunctionBegin; 57971f1c65dSBarry Smith if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 580b01c7715SBarry Smith its = its*lits; 58177431f27SBarry Smith if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 582b01c7715SBarry Smith if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 583b01c7715SBarry Smith if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 58471f1c65dSBarry Smith if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 585b01c7715SBarry Smith if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 586b01c7715SBarry Smith 587b01c7715SBarry Smith if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 588b01c7715SBarry Smith 589b01c7715SBarry Smith diag = a->diag; 590b01c7715SBarry Smith idiag = a->idiag; 5911ebc52fbSHong Zhang ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 5921ebc52fbSHong Zhang ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 593b01c7715SBarry Smith 594b01c7715SBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 595b01c7715SBarry Smith if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 596b01c7715SBarry Smith x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20]; 597b01c7715SBarry Smith x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21]; 598b01c7715SBarry Smith x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22]; 599b01c7715SBarry Smith x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23]; 600b01c7715SBarry Smith x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24]; 601b01c7715SBarry Smith i2 = 5; 602b01c7715SBarry Smith idiag += 25; 603b01c7715SBarry Smith for (i=1; i<m; i++) { 604b01c7715SBarry Smith v = aa + 25*ai[i]; 605b01c7715SBarry Smith vi = aj + ai[i]; 606b01c7715SBarry Smith nz = diag[i] - ai[i]; 607b01c7715SBarry Smith s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; 608b01c7715SBarry Smith while (nz--) { 609b01c7715SBarry Smith idx = 5*(*vi++); 610b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 611b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 612b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 613b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 614b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 615b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 616b01c7715SBarry Smith v += 25; 617b01c7715SBarry Smith } 618b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 619b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 620b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 621b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 622b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 623b01c7715SBarry Smith idiag += 25; 624b01c7715SBarry Smith i2 += 5; 625b01c7715SBarry Smith } 626b01c7715SBarry Smith /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 627dc0b31edSSatish Balay ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 628b01c7715SBarry Smith } 629b01c7715SBarry Smith if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 630b01c7715SBarry Smith (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 631b01c7715SBarry Smith i2 = 0; 632b01c7715SBarry Smith mdiag = a->idiag+25*a->mbs; 633b01c7715SBarry Smith for (i=0; i<m; i++) { 634b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 635b01c7715SBarry Smith x[i2] = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5; 636b01c7715SBarry Smith x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5; 637b01c7715SBarry Smith x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5; 638b01c7715SBarry Smith x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5; 639b01c7715SBarry Smith x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5; 640b01c7715SBarry Smith mdiag += 25; 641b01c7715SBarry Smith i2 += 5; 642b01c7715SBarry Smith } 643dc0b31edSSatish Balay ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); 644b01c7715SBarry Smith } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 645d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 646b01c7715SBarry Smith } 647b01c7715SBarry Smith if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 648b01c7715SBarry Smith idiag = a->idiag+25*a->mbs - 25; 649b01c7715SBarry Smith i2 = 5*m - 5; 650b01c7715SBarry Smith x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; 651b01c7715SBarry Smith x[i2] = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5; 652b01c7715SBarry Smith x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5; 653b01c7715SBarry Smith x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5; 654b01c7715SBarry Smith x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5; 655b01c7715SBarry Smith x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5; 656b01c7715SBarry Smith idiag -= 25; 657b01c7715SBarry Smith i2 -= 5; 658b01c7715SBarry Smith for (i=m-2; i>=0; i--) { 659b01c7715SBarry Smith v = aa + 25*(diag[i]+1); 660b01c7715SBarry Smith vi = aj + diag[i] + 1; 661b01c7715SBarry Smith nz = ai[i+1] - diag[i] - 1; 662b01c7715SBarry Smith s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; 663b01c7715SBarry Smith while (nz--) { 664b01c7715SBarry Smith idx = 5*(*vi++); 665b01c7715SBarry Smith x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; 666b01c7715SBarry Smith s1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 667b01c7715SBarry Smith s2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 668b01c7715SBarry Smith s3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 669b01c7715SBarry Smith s4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 670b01c7715SBarry Smith s5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 671b01c7715SBarry Smith v += 25; 672b01c7715SBarry Smith } 673b01c7715SBarry Smith x[i2] = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5; 674b01c7715SBarry Smith x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5; 675b01c7715SBarry Smith x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5; 676b01c7715SBarry Smith x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5; 677b01c7715SBarry Smith x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5; 678b01c7715SBarry Smith idiag -= 25; 679b01c7715SBarry Smith i2 -= 5; 680b01c7715SBarry Smith } 681dc0b31edSSatish Balay ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr); 682b01c7715SBarry Smith } 683b01c7715SBarry Smith } else { 684634064b4SBarry Smith SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 685b01c7715SBarry Smith } 6861ebc52fbSHong Zhang ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 6871ebc52fbSHong Zhang ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 688b01c7715SBarry Smith PetscFunctionReturn(0); 689b01c7715SBarry Smith } 690b01c7715SBarry Smith 6916d3beeddSMatthew Knepley #undef __FUNCT__ 6926d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_6" 6936d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 6946d3beeddSMatthew Knepley { 6956d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 6966d3beeddSMatthew Knepley PetscScalar *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6; 697dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 698dd6ea824SBarry Smith const PetscScalar *b; 6996d3beeddSMatthew Knepley PetscErrorCode ierr; 7006d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 7016d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 7026d3beeddSMatthew Knepley 7036d3beeddSMatthew Knepley PetscFunctionBegin; 7046d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 7056d3beeddSMatthew Knepley its = its*lits; 7066d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 7076d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 7086d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 7096d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 7106d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 7116d3beeddSMatthew Knepley 7126d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 7136d3beeddSMatthew Knepley 7146d3beeddSMatthew Knepley diag = a->diag; 7156d3beeddSMatthew Knepley idiag = a->idiag; 7166d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 7176d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 7186d3beeddSMatthew Knepley 7196d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 7206d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 7216d3beeddSMatthew Knepley x[0] = b[0]*idiag[0] + b[1]*idiag[6] + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30]; 7226d3beeddSMatthew Knepley x[1] = b[0]*idiag[1] + b[1]*idiag[7] + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31]; 7236d3beeddSMatthew Knepley x[2] = b[0]*idiag[2] + b[1]*idiag[8] + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32]; 7246d3beeddSMatthew Knepley x[3] = b[0]*idiag[3] + b[1]*idiag[9] + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33]; 7256d3beeddSMatthew Knepley x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34]; 7266d3beeddSMatthew Knepley x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35]; 7276d3beeddSMatthew Knepley i2 = 6; 7286d3beeddSMatthew Knepley idiag += 36; 7296d3beeddSMatthew Knepley for (i=1; i<m; i++) { 7306d3beeddSMatthew Knepley v = aa + 36*ai[i]; 7316d3beeddSMatthew Knepley vi = aj + ai[i]; 7326d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 7336d3beeddSMatthew Knepley s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; 7346d3beeddSMatthew Knepley while (nz--) { 7356d3beeddSMatthew Knepley idx = 6*(*vi++); 7366d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 7376d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 7386d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 7396d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 7406d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 7416d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 7426d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 7436d3beeddSMatthew Knepley v += 36; 7446d3beeddSMatthew Knepley } 7456d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 7466d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 7476d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 7486d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 7496d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 7506d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 7516d3beeddSMatthew Knepley idiag += 36; 7526d3beeddSMatthew Knepley i2 += 6; 7536d3beeddSMatthew Knepley } 7546d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 755dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 7566d3beeddSMatthew Knepley } 7576d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 7586d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 7596d3beeddSMatthew Knepley i2 = 0; 7606d3beeddSMatthew Knepley mdiag = a->idiag+36*a->mbs; 7616d3beeddSMatthew Knepley for (i=0; i<m; i++) { 7626d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 7636d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1 + mdiag[6]*x2 + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6; 7646d3beeddSMatthew Knepley x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2 + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6; 7656d3beeddSMatthew Knepley x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2 + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6; 7666d3beeddSMatthew Knepley x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2 + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6; 7676d3beeddSMatthew Knepley x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6; 7686d3beeddSMatthew Knepley x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6; 7696d3beeddSMatthew Knepley mdiag += 36; 7706d3beeddSMatthew Knepley i2 += 6; 7716d3beeddSMatthew Knepley } 772dc0b31edSSatish Balay ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr); 7736d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 774d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 7756d3beeddSMatthew Knepley } 7766d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 7776d3beeddSMatthew Knepley idiag = a->idiag+36*a->mbs - 36; 7786d3beeddSMatthew Knepley i2 = 6*m - 6; 7796d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; 7806d3beeddSMatthew Knepley x[i2] = idiag[0]*x1 + idiag[6]*x2 + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6; 7816d3beeddSMatthew Knepley x[i2+1] = idiag[1]*x1 + idiag[7]*x2 + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6; 7826d3beeddSMatthew Knepley x[i2+2] = idiag[2]*x1 + idiag[8]*x2 + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6; 7836d3beeddSMatthew Knepley x[i2+3] = idiag[3]*x1 + idiag[9]*x2 + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6; 7846d3beeddSMatthew Knepley x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6; 7856d3beeddSMatthew Knepley x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6; 7866d3beeddSMatthew Knepley idiag -= 36; 7876d3beeddSMatthew Knepley i2 -= 6; 7886d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 7896d3beeddSMatthew Knepley v = aa + 36*(diag[i]+1); 7906d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 7916d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 7926d3beeddSMatthew Knepley s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; 7936d3beeddSMatthew Knepley while (nz--) { 7946d3beeddSMatthew Knepley idx = 6*(*vi++); 7956d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; 7966d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[6]*x2 + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6; 7976d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[7]*x2 + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6; 7986d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[8]*x2 + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6; 7996d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[9]*x2 + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6; 8006d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6; 8016d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6; 8026d3beeddSMatthew Knepley v += 36; 8036d3beeddSMatthew Knepley } 8046d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[6]*s2 + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6; 8056d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[7]*s2 + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6; 8066d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[8]*s2 + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6; 8076d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[9]*s2 + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6; 8086d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6; 8096d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6; 8106d3beeddSMatthew Knepley idiag -= 36; 8116d3beeddSMatthew Knepley i2 -= 6; 8126d3beeddSMatthew Knepley } 813dc0b31edSSatish Balay ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr); 8146d3beeddSMatthew Knepley } 8156d3beeddSMatthew Knepley } else { 8166d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 8176d3beeddSMatthew Knepley } 8186d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 8196d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 8206d3beeddSMatthew Knepley PetscFunctionReturn(0); 8216d3beeddSMatthew Knepley } 8226d3beeddSMatthew Knepley 8236d3beeddSMatthew Knepley #undef __FUNCT__ 8246d3beeddSMatthew Knepley #define __FUNCT__ "MatPBRelax_SeqBAIJ_7" 8256d3beeddSMatthew Knepley PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 8266d3beeddSMatthew Knepley { 8276d3beeddSMatthew Knepley Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8286d3beeddSMatthew Knepley PetscScalar *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7; 829dd6ea824SBarry Smith const MatScalar *v,*aa = a->a, *idiag,*mdiag; 830dd6ea824SBarry Smith const PetscScalar *b; 8316d3beeddSMatthew Knepley PetscErrorCode ierr; 8326d3beeddSMatthew Knepley PetscInt m = a->mbs,i,i2,nz,idx; 8336d3beeddSMatthew Knepley const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 8346d3beeddSMatthew Knepley 8356d3beeddSMatthew Knepley PetscFunctionBegin; 8366d3beeddSMatthew Knepley if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 8376d3beeddSMatthew Knepley its = its*lits; 8386d3beeddSMatthew Knepley if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 8396d3beeddSMatthew Knepley if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 8406d3beeddSMatthew Knepley if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 8416d3beeddSMatthew Knepley if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 8426d3beeddSMatthew Knepley if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations"); 8436d3beeddSMatthew Knepley 8446d3beeddSMatthew Knepley if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);} 8456d3beeddSMatthew Knepley 8466d3beeddSMatthew Knepley diag = a->diag; 8476d3beeddSMatthew Knepley idiag = a->idiag; 8486d3beeddSMatthew Knepley ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 8496d3beeddSMatthew Knepley ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 8506d3beeddSMatthew Knepley 8516d3beeddSMatthew Knepley if (flag & SOR_ZERO_INITIAL_GUESS) { 8526d3beeddSMatthew Knepley if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 8536d3beeddSMatthew Knepley x[0] = b[0]*idiag[0] + b[1]*idiag[7] + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42]; 8546d3beeddSMatthew Knepley x[1] = b[0]*idiag[1] + b[1]*idiag[8] + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43]; 8556d3beeddSMatthew Knepley x[2] = b[0]*idiag[2] + b[1]*idiag[9] + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44]; 8566d3beeddSMatthew Knepley x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45]; 8576d3beeddSMatthew Knepley x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46]; 8586d3beeddSMatthew Knepley x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47]; 8596d3beeddSMatthew Knepley x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48]; 8606d3beeddSMatthew Knepley i2 = 7; 8616d3beeddSMatthew Knepley idiag += 49; 8626d3beeddSMatthew Knepley for (i=1; i<m; i++) { 8636d3beeddSMatthew Knepley v = aa + 49*ai[i]; 8646d3beeddSMatthew Knepley vi = aj + ai[i]; 8656d3beeddSMatthew Knepley nz = diag[i] - ai[i]; 8666d3beeddSMatthew Knepley s1 = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6]; 8676d3beeddSMatthew Knepley while (nz--) { 8686d3beeddSMatthew Knepley idx = 7*(*vi++); 8696d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 8706d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 8716d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 8726d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 8736d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 8746d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 8756d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 8766d3beeddSMatthew Knepley s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 8776d3beeddSMatthew Knepley v += 49; 8786d3beeddSMatthew Knepley } 8796d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 8806d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 8816d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 8826d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 8836d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 8846d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 8856d3beeddSMatthew Knepley x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 8866d3beeddSMatthew Knepley idiag += 49; 8876d3beeddSMatthew Knepley i2 += 7; 8886d3beeddSMatthew Knepley } 8896d3beeddSMatthew Knepley /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 890dc0b31edSSatish Balay ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 8916d3beeddSMatthew Knepley } 8926d3beeddSMatthew Knepley if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) && 8936d3beeddSMatthew Knepley (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) { 8946d3beeddSMatthew Knepley i2 = 0; 8956d3beeddSMatthew Knepley mdiag = a->idiag+49*a->mbs; 8966d3beeddSMatthew Knepley for (i=0; i<m; i++) { 8976d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 8986d3beeddSMatthew Knepley x[i2] = mdiag[0]*x1 + mdiag[7]*x2 + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7; 8996d3beeddSMatthew Knepley x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2 + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7; 9006d3beeddSMatthew Knepley x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2 + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7; 9016d3beeddSMatthew Knepley x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7; 9026d3beeddSMatthew Knepley x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7; 9036d3beeddSMatthew Knepley x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7; 9046d3beeddSMatthew Knepley x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7; 9056d3beeddSMatthew Knepley mdiag += 36; 9066d3beeddSMatthew Knepley i2 += 6; 9076d3beeddSMatthew Knepley } 908dc0b31edSSatish Balay ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr); 9096d3beeddSMatthew Knepley } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 910d0f46423SBarry Smith ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr); 9116d3beeddSMatthew Knepley } 9126d3beeddSMatthew Knepley if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 9136d3beeddSMatthew Knepley idiag = a->idiag+49*a->mbs - 49; 9146d3beeddSMatthew Knepley i2 = 7*m - 7; 9156d3beeddSMatthew Knepley x1 = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6]; 9166d3beeddSMatthew Knepley x[i2] = idiag[0]*x1 + idiag[7]*x2 + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7; 9176d3beeddSMatthew Knepley x[i2+1] = idiag[1]*x1 + idiag[8]*x2 + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7; 9186d3beeddSMatthew Knepley x[i2+2] = idiag[2]*x1 + idiag[9]*x2 + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7; 9196d3beeddSMatthew Knepley x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7; 9206d3beeddSMatthew Knepley x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7; 9216d3beeddSMatthew Knepley x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7; 9226d3beeddSMatthew Knepley x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7; 9236d3beeddSMatthew Knepley idiag -= 49; 9246d3beeddSMatthew Knepley i2 -= 7; 9256d3beeddSMatthew Knepley for (i=m-2; i>=0; i--) { 9266d3beeddSMatthew Knepley v = aa + 49*(diag[i]+1); 9276d3beeddSMatthew Knepley vi = aj + diag[i] + 1; 9286d3beeddSMatthew Knepley nz = ai[i+1] - diag[i] - 1; 9296d3beeddSMatthew Knepley s1 = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6]; 9306d3beeddSMatthew Knepley while (nz--) { 9316d3beeddSMatthew Knepley idx = 7*(*vi++); 9326d3beeddSMatthew Knepley x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx]; 9336d3beeddSMatthew Knepley s1 -= v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 9346d3beeddSMatthew Knepley s2 -= v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 9356d3beeddSMatthew Knepley s3 -= v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 9366d3beeddSMatthew Knepley s4 -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 9376d3beeddSMatthew Knepley s5 -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 9386d3beeddSMatthew Knepley s6 -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 9396d3beeddSMatthew Knepley s7 -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 9406d3beeddSMatthew Knepley v += 49; 9416d3beeddSMatthew Knepley } 9426d3beeddSMatthew Knepley x[i2] = idiag[0]*s1 + idiag[7]*s2 + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7; 9436d3beeddSMatthew Knepley x[i2+1] = idiag[1]*s1 + idiag[8]*s2 + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7; 9446d3beeddSMatthew Knepley x[i2+2] = idiag[2]*s1 + idiag[9]*s2 + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7; 9456d3beeddSMatthew Knepley x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7; 9466d3beeddSMatthew Knepley x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7; 9476d3beeddSMatthew Knepley x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7; 9486d3beeddSMatthew Knepley x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7; 9496d3beeddSMatthew Knepley idiag -= 49; 9506d3beeddSMatthew Knepley i2 -= 7; 9516d3beeddSMatthew Knepley } 952dc0b31edSSatish Balay ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr); 9536d3beeddSMatthew Knepley } 9546d3beeddSMatthew Knepley } else { 9556d3beeddSMatthew Knepley SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess"); 9566d3beeddSMatthew Knepley } 9576d3beeddSMatthew Knepley ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 9586d3beeddSMatthew Knepley ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 9596d3beeddSMatthew Knepley PetscFunctionReturn(0); 9606d3beeddSMatthew Knepley } 9616d3beeddSMatthew Knepley 962af674e45SBarry Smith /* 96381824310SBarry Smith Special version for direct calls from Fortran (Used in PETSc-fun3d) 964af674e45SBarry Smith */ 965af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 966af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 967af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 968af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 969af674e45SBarry Smith #endif 970af674e45SBarry Smith 9719c8c1041SBarry Smith EXTERN_C_BEGIN 972af674e45SBarry Smith #undef __FUNCT__ 973af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 974be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 975af674e45SBarry Smith { 976af674e45SBarry Smith Mat A = *AA; 977af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 978c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 979c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 98017ec6a02SBarry Smith PetscInt *aj=a->j,stepval,lastcol = -1; 981f15d580aSBarry Smith const PetscScalar *value = v; 9824bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 983af674e45SBarry Smith 984af674e45SBarry Smith PetscFunctionBegin; 985d0f46423SBarry Smith if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 986af674e45SBarry Smith stepval = (n-1)*4; 987af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 988af674e45SBarry Smith row = im[k]; 989af674e45SBarry Smith rp = aj + ai[row]; 990af674e45SBarry Smith ap = aa + 16*ai[row]; 991af674e45SBarry Smith nrow = ailen[row]; 992af674e45SBarry Smith low = 0; 99317ec6a02SBarry Smith high = nrow; 994af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 995af674e45SBarry Smith col = in[l]; 99617ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 99717ec6a02SBarry Smith lastcol = col; 9981e3347e8SBarry Smith value = v + k*(stepval+4 + l)*4; 999af674e45SBarry Smith while (high-low > 7) { 1000af674e45SBarry Smith t = (low+high)/2; 1001af674e45SBarry Smith if (rp[t] > col) high = t; 1002af674e45SBarry Smith else low = t; 1003af674e45SBarry Smith } 1004af674e45SBarry Smith for (i=low; i<high; i++) { 1005af674e45SBarry Smith if (rp[i] > col) break; 1006af674e45SBarry Smith if (rp[i] == col) { 1007af674e45SBarry Smith bap = ap + 16*i; 1008af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1009af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1010af674e45SBarry Smith bap[jj] += *value++; 1011af674e45SBarry Smith } 1012af674e45SBarry Smith } 1013af674e45SBarry Smith goto noinsert2; 1014af674e45SBarry Smith } 1015af674e45SBarry Smith } 1016af674e45SBarry Smith N = nrow++ - 1; 101717ec6a02SBarry Smith high++; /* added new column index thus must search to one higher than before */ 1018af674e45SBarry Smith /* shift up all the later entries in this row */ 1019af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1020af674e45SBarry Smith rp[ii+1] = rp[ii]; 1021a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1022af674e45SBarry Smith } 1023af674e45SBarry Smith if (N >= i) { 1024a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1025af674e45SBarry Smith } 1026af674e45SBarry Smith rp[i] = col; 1027af674e45SBarry Smith bap = ap + 16*i; 1028af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 1029af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 1030af674e45SBarry Smith bap[jj] = *value++; 1031af674e45SBarry Smith } 1032af674e45SBarry Smith } 1033af674e45SBarry Smith noinsert2:; 1034af674e45SBarry Smith low = i; 1035af674e45SBarry Smith } 1036af674e45SBarry Smith ailen[row] = nrow; 1037af674e45SBarry Smith } 1038be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1039af674e45SBarry Smith } 10409c8c1041SBarry Smith EXTERN_C_END 1041af674e45SBarry Smith 1042af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 1043af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 1044af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1045af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 1046af674e45SBarry Smith #endif 1047af674e45SBarry Smith 10489c8c1041SBarry Smith EXTERN_C_BEGIN 1049af674e45SBarry Smith #undef __FUNCT__ 1050af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 1051be1d678aSKris Buschelman void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1052af674e45SBarry Smith { 1053af674e45SBarry Smith Mat A = *AA; 1054af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1055c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1056c1ac3661SBarry Smith PetscInt *ai=a->i,*ailen=a->ilen; 1057c1ac3661SBarry Smith PetscInt *aj=a->j,brow,bcol; 105817ec6a02SBarry Smith PetscInt ridx,cidx,lastcol = -1; 1059af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 1060af674e45SBarry Smith 1061af674e45SBarry Smith PetscFunctionBegin; 1062af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 1063af674e45SBarry Smith row = im[k]; brow = row/4; 1064af674e45SBarry Smith rp = aj + ai[brow]; 1065af674e45SBarry Smith ap = aa + 16*ai[brow]; 1066af674e45SBarry Smith nrow = ailen[brow]; 1067af674e45SBarry Smith low = 0; 106817ec6a02SBarry Smith high = nrow; 1069af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 1070af674e45SBarry Smith col = in[l]; bcol = col/4; 1071af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 1072af674e45SBarry Smith value = v[l + k*n]; 107317ec6a02SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 107417ec6a02SBarry Smith lastcol = col; 1075af674e45SBarry Smith while (high-low > 7) { 1076af674e45SBarry Smith t = (low+high)/2; 1077af674e45SBarry Smith if (rp[t] > bcol) high = t; 1078af674e45SBarry Smith else low = t; 1079af674e45SBarry Smith } 1080af674e45SBarry Smith for (i=low; i<high; i++) { 1081af674e45SBarry Smith if (rp[i] > bcol) break; 1082af674e45SBarry Smith if (rp[i] == bcol) { 1083af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 1084af674e45SBarry Smith *bap += value; 1085af674e45SBarry Smith goto noinsert1; 1086af674e45SBarry Smith } 1087af674e45SBarry Smith } 1088af674e45SBarry Smith N = nrow++ - 1; 108917ec6a02SBarry Smith high++; /* added new column thus must search to one higher than before */ 1090af674e45SBarry Smith /* shift up all the later entries in this row */ 1091af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 1092af674e45SBarry Smith rp[ii+1] = rp[ii]; 1093a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1094af674e45SBarry Smith } 1095af674e45SBarry Smith if (N>=i) { 1096a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1097af674e45SBarry Smith } 1098af674e45SBarry Smith rp[i] = bcol; 1099af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 1100af674e45SBarry Smith noinsert1:; 1101af674e45SBarry Smith low = i; 1102af674e45SBarry Smith } 1103af674e45SBarry Smith ailen[brow] = nrow; 1104af674e45SBarry Smith } 1105be1d678aSKris Buschelman PetscFunctionReturnVoid(); 1106af674e45SBarry Smith } 11079c8c1041SBarry Smith EXTERN_C_END 1108af674e45SBarry Smith 1109be5855fcSBarry Smith /* 1110be5855fcSBarry Smith Checks for missing diagonals 1111be5855fcSBarry Smith */ 11124a2ae208SSatish Balay #undef __FUNCT__ 11134a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 11142af78befSBarry Smith PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d) 1115be5855fcSBarry Smith { 1116be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11176849ba73SBarry Smith PetscErrorCode ierr; 1118c1ac3661SBarry Smith PetscInt *diag,*jj = a->j,i; 1119be5855fcSBarry Smith 1120be5855fcSBarry Smith PetscFunctionBegin; 1121c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 11222af78befSBarry Smith *missing = PETSC_FALSE; 11232efa7f71SHong Zhang if (A->rmap->n > 0 && !jj) { 11242efa7f71SHong Zhang *missing = PETSC_TRUE; 11252efa7f71SHong Zhang if (d) *d = 0; 11262efa7f71SHong Zhang PetscInfo(A,"Matrix has no entries therefor is missing diagonal"); 11272efa7f71SHong Zhang } else { 1128883fce79SBarry Smith diag = a->diag; 11290e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 1130be5855fcSBarry Smith if (jj[diag[i]] != i) { 11312af78befSBarry Smith *missing = PETSC_TRUE; 11322af78befSBarry Smith if (d) *d = i; 11332efa7f71SHong Zhang PetscInfo1(A,"Matrix is missing block diagonal number %D",i); 11342efa7f71SHong Zhang } 1135be5855fcSBarry Smith } 1136be5855fcSBarry Smith } 1137be5855fcSBarry Smith PetscFunctionReturn(0); 1138be5855fcSBarry Smith } 1139be5855fcSBarry Smith 11404a2ae208SSatish Balay #undef __FUNCT__ 11414a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1142dfbe8321SBarry Smith PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1143de6a44a3SBarry Smith { 1144de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11456849ba73SBarry Smith PetscErrorCode ierr; 114609f38230SBarry Smith PetscInt i,j,m = a->mbs; 1147de6a44a3SBarry Smith 11483a40ed3dSBarry Smith PetscFunctionBegin; 114909f38230SBarry Smith if (!a->diag) { 115009f38230SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 115109f38230SBarry Smith } 11527fc0212eSBarry Smith for (i=0; i<m; i++) { 115309f38230SBarry Smith a->diag[i] = a->i[i+1]; 1154de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 1155de6a44a3SBarry Smith if (a->j[j] == i) { 115609f38230SBarry Smith a->diag[i] = j; 1157de6a44a3SBarry Smith break; 1158de6a44a3SBarry Smith } 1159de6a44a3SBarry Smith } 1160de6a44a3SBarry Smith } 11613a40ed3dSBarry Smith PetscFunctionReturn(0); 1162de6a44a3SBarry Smith } 11632593348eSBarry Smith 11642593348eSBarry Smith 1165690b6cddSBarry Smith EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**); 11663b2fbd54SBarry Smith 11674a2ae208SSatish Balay #undef __FUNCT__ 11684a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 11698f7157efSSatish Balay static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 11703b2fbd54SBarry Smith { 11713b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1172dfbe8321SBarry Smith PetscErrorCode ierr; 1173d0f46423SBarry Smith PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt; 11748f7157efSSatish Balay PetscInt *tia, *tja; 11753b2fbd54SBarry Smith 11763a40ed3dSBarry Smith PetscFunctionBegin; 11773b2fbd54SBarry Smith *nn = n; 11783a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 11793b2fbd54SBarry Smith if (symmetric) { 11808f7157efSSatish Balay ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 11813b2fbd54SBarry Smith } else { 11828f7157efSSatish Balay tia = a->i; tja = a->j; 11833b2fbd54SBarry Smith } 11843b2fbd54SBarry Smith 1185ecc77c7aSBarry Smith if (!blockcompressed && bs > 1) { 1186ecc77c7aSBarry Smith (*nn) *= bs; 1187ecc77c7aSBarry Smith nbs = bs; 11888f7157efSSatish Balay /* malloc & create the natural set of indices */ 1189ecc77c7aSBarry Smith ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr); 11909985e31cSBarry Smith if (n) { 1191ecc77c7aSBarry Smith (*ia)[0] = 0; 1192ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1193ecc77c7aSBarry Smith (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1194ecc77c7aSBarry Smith } 11959985e31cSBarry Smith } 1196ecc77c7aSBarry Smith 1197ecc77c7aSBarry Smith for (i=1; i<n; i++) { 1198ecc77c7aSBarry Smith (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1199ecc77c7aSBarry Smith for (j=1; j<bs; j++) { 1200ecc77c7aSBarry Smith (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 12018f7157efSSatish Balay } 12028f7157efSSatish Balay } 12039985e31cSBarry Smith if (n) { 1204ecc77c7aSBarry Smith (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 12059985e31cSBarry Smith } 1206ecc77c7aSBarry Smith 12079985e31cSBarry Smith if (ja) { 12089985e31cSBarry Smith ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr); 12099985e31cSBarry Smith cnt = 0; 12109985e31cSBarry Smith for (i=0; i<n; i++) { 12119985e31cSBarry Smith for (j=0; j<bs; j++) { 12129985e31cSBarry Smith for (k=tia[i]; k<tia[i+1]; k++) { 12139985e31cSBarry Smith for (l=0; l<bs; l++) { 12149985e31cSBarry Smith (*ja)[cnt++] = bs*tja[k] + l; 12159985e31cSBarry Smith } 12169985e31cSBarry Smith } 12179985e31cSBarry Smith } 12189985e31cSBarry Smith } 12199985e31cSBarry Smith } 12209985e31cSBarry Smith 1221ecc77c7aSBarry Smith n *= bs; 1222ecc77c7aSBarry Smith nz *= bs*bs; 12238f7157efSSatish Balay if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 12248f7157efSSatish Balay ierr = PetscFree(tia);CHKERRQ(ierr); 12258f7157efSSatish Balay ierr = PetscFree(tja);CHKERRQ(ierr); 12268f7157efSSatish Balay } 1227*f6d58c54SBarry Smith } else if (oshift == 1) { 1228*f6d58c54SBarry Smith PetscInt nz = a->i[A->rmap->n/bs]; 1229*f6d58c54SBarry Smith /* malloc space and add 1 to i and j indices */ 1230*f6d58c54SBarry Smith ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr); 1231*f6d58c54SBarry Smith for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1232*f6d58c54SBarry Smith if (ja) { 1233*f6d58c54SBarry Smith ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr); 1234*f6d58c54SBarry Smith for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1235*f6d58c54SBarry Smith } 12368f7157efSSatish Balay } else { 12378f7157efSSatish Balay *ia = tia; 1238ecc77c7aSBarry Smith if (ja) *ja = tja; 12398f7157efSSatish Balay } 1240*f6d58c54SBarry Smith 12413a40ed3dSBarry Smith PetscFunctionReturn(0); 12423b2fbd54SBarry Smith } 12433b2fbd54SBarry Smith 12444a2ae208SSatish Balay #undef __FUNCT__ 12454a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 12468f7157efSSatish Balay static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 12473b2fbd54SBarry Smith { 12486849ba73SBarry Smith PetscErrorCode ierr; 12493b2fbd54SBarry Smith 12503a40ed3dSBarry Smith PetscFunctionBegin; 12513a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 1252d0f46423SBarry Smith if (!blockcompressed && A->rmap->bs > 1) { 1253ecc77c7aSBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 12549985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1255*f6d58c54SBarry Smith } else if (symmetric || oshift == 1) { 1256606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 12579985e31cSBarry Smith if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 12583b2fbd54SBarry Smith } 12593a40ed3dSBarry Smith PetscFunctionReturn(0); 12603b2fbd54SBarry Smith } 12613b2fbd54SBarry Smith 12624a2ae208SSatish Balay #undef __FUNCT__ 12634a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 1264dfbe8321SBarry Smith PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 12652d61bbb3SSatish Balay { 12662d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267dfbe8321SBarry Smith PetscErrorCode ierr; 12682d61bbb3SSatish Balay 1269433994e6SBarry Smith PetscFunctionBegin; 1270aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1271d0f46423SBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 12722d61bbb3SSatish Balay #endif 1273e6b907acSBarry Smith ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1274c38d4ed2SBarry Smith if (a->row) { 1275c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 1276c38d4ed2SBarry Smith } 1277c38d4ed2SBarry Smith if (a->col) { 1278c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 1279c38d4ed2SBarry Smith } 128005b42c5fSBarry Smith ierr = PetscFree(a->diag);CHKERRQ(ierr); 128105b42c5fSBarry Smith ierr = PetscFree(a->idiag);CHKERRQ(ierr); 128205b42c5fSBarry Smith ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128305b42c5fSBarry Smith ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 128405b42c5fSBarry Smith ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1285e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 128605b42c5fSBarry Smith ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128705b42c5fSBarry Smith ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 128873e7a558SHong Zhang if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);} 1289c4319e64SHong Zhang 129070e08fbdSBarry Smith if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);} 1291606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 1292901853e0SKris Buschelman 1293dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129443516a2dSKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr); 1295901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1296901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1297901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 1298901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 1299901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1300901853e0SKris Buschelman ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1301725b52f3SLisandro Dalcin ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 13022d61bbb3SSatish Balay PetscFunctionReturn(0); 13032d61bbb3SSatish Balay } 13042d61bbb3SSatish Balay 13054a2ae208SSatish Balay #undef __FUNCT__ 13064a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 13074e0d8c25SBarry Smith PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg) 13082d61bbb3SSatish Balay { 13092d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 131063ba0a88SBarry Smith PetscErrorCode ierr; 13112d61bbb3SSatish Balay 13122d61bbb3SSatish Balay PetscFunctionBegin; 1313aa275fccSKris Buschelman switch (op) { 1314aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 13154e0d8c25SBarry Smith a->roworiented = flg; 1316aa275fccSKris Buschelman break; 1317a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 1318a9817697SBarry Smith a->keepnonzeropattern = flg; 1319aa275fccSKris Buschelman break; 1320512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 1321512a5fc5SBarry Smith a->nonew = (flg ? 0 : 1); 1322aa275fccSKris Buschelman break; 1323aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 13244e0d8c25SBarry Smith a->nonew = (flg ? -1 : 0); 1325aa275fccSKris Buschelman break; 1326aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 13274e0d8c25SBarry Smith a->nonew = (flg ? -2 : 0); 1328aa275fccSKris Buschelman break; 132928b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 133028b2fa4aSMatthew Knepley a->nounused = (flg ? -1 : 0); 133128b2fa4aSMatthew Knepley break; 13324e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1333aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 1334aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 1335290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1336aa275fccSKris Buschelman break; 133777e54ba9SKris Buschelman case MAT_SYMMETRIC: 133877e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 13399a4540c5SBarry Smith case MAT_HERMITIAN: 13409a4540c5SBarry Smith case MAT_SYMMETRY_ETERNAL: 1341290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 134277e54ba9SKris Buschelman break; 1343aa275fccSKris Buschelman default: 1344ad86a440SBarry Smith SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 13452d61bbb3SSatish Balay } 13462d61bbb3SSatish Balay PetscFunctionReturn(0); 13472d61bbb3SSatish Balay } 13482d61bbb3SSatish Balay 13494a2ae208SSatish Balay #undef __FUNCT__ 13504a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 1351c1ac3661SBarry Smith PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 13522d61bbb3SSatish Balay { 13532d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 13546849ba73SBarry Smith PetscErrorCode ierr; 1355c1ac3661SBarry Smith PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 13563f1db9ecSBarry Smith MatScalar *aa,*aa_i; 135787828ca2SBarry Smith PetscScalar *v_i; 13582d61bbb3SSatish Balay 13592d61bbb3SSatish Balay PetscFunctionBegin; 1360d0f46423SBarry Smith bs = A->rmap->bs; 13612d61bbb3SSatish Balay ai = a->i; 13622d61bbb3SSatish Balay aj = a->j; 13632d61bbb3SSatish Balay aa = a->a; 13642d61bbb3SSatish Balay bs2 = a->bs2; 13652d61bbb3SSatish Balay 1366d0f46423SBarry Smith if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 13672d61bbb3SSatish Balay 13682d61bbb3SSatish Balay bn = row/bs; /* Block number */ 13692d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 13702d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 13712d61bbb3SSatish Balay *nz = bs*M; 13722d61bbb3SSatish Balay 13732d61bbb3SSatish Balay if (v) { 13742d61bbb3SSatish Balay *v = 0; 13752d61bbb3SSatish Balay if (*nz) { 137687828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 13772d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13782d61bbb3SSatish Balay v_i = *v + i*bs; 13792d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 13802d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 13812d61bbb3SSatish Balay } 13822d61bbb3SSatish Balay } 13832d61bbb3SSatish Balay } 13842d61bbb3SSatish Balay 13852d61bbb3SSatish Balay if (idx) { 13862d61bbb3SSatish Balay *idx = 0; 13872d61bbb3SSatish Balay if (*nz) { 1388c1ac3661SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr); 13892d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 13902d61bbb3SSatish Balay idx_i = *idx + i*bs; 13912d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 13922d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 13932d61bbb3SSatish Balay } 13942d61bbb3SSatish Balay } 13952d61bbb3SSatish Balay } 13962d61bbb3SSatish Balay PetscFunctionReturn(0); 13972d61bbb3SSatish Balay } 13982d61bbb3SSatish Balay 13994a2ae208SSatish Balay #undef __FUNCT__ 14004a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1401c1ac3661SBarry Smith PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 14022d61bbb3SSatish Balay { 1403dfbe8321SBarry Smith PetscErrorCode ierr; 1404606d414cSSatish Balay 14052d61bbb3SSatish Balay PetscFunctionBegin; 140605b42c5fSBarry Smith if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 140705b42c5fSBarry Smith if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 14082d61bbb3SSatish Balay PetscFunctionReturn(0); 14092d61bbb3SSatish Balay } 14102d61bbb3SSatish Balay 14114a2ae208SSatish Balay #undef __FUNCT__ 14124a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 1413fc4dec0aSBarry Smith PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 14142d61bbb3SSatish Balay { 14152d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 14162d61bbb3SSatish Balay Mat C; 14176849ba73SBarry Smith PetscErrorCode ierr; 1418d0f46423SBarry Smith PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1419c1ac3661SBarry Smith PetscInt *rows,*cols,bs2=a->bs2; 1420dd6ea824SBarry Smith MatScalar *array; 14212d61bbb3SSatish Balay 14222d61bbb3SSatish Balay PetscFunctionBegin; 1423e9695a30SBarry Smith if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1424fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1425c1ac3661SBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr); 1426c1ac3661SBarry Smith ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr); 14272d61bbb3SSatish Balay 14282d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 14297adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1430d0f46423SBarry Smith ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 14317adad957SLisandro Dalcin ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1432ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr); 1433606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 1434fc4dec0aSBarry Smith } else { 1435fc4dec0aSBarry Smith C = *B; 1436fc4dec0aSBarry Smith } 1437fc4dec0aSBarry Smith 1438fc4dec0aSBarry Smith array = a->a; 1439c1ac3661SBarry Smith ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr); 14402d61bbb3SSatish Balay cols = rows + bs; 14412d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 14422d61bbb3SSatish Balay cols[0] = i*bs; 14432d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 14442d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 14452d61bbb3SSatish Balay for (j=0; j<len; j++) { 14462d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 14472d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 14482d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 14492d61bbb3SSatish Balay array += bs2; 14502d61bbb3SSatish Balay } 14512d61bbb3SSatish Balay } 1452606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 14532d61bbb3SSatish Balay 14542d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14552d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 14562d61bbb3SSatish Balay 1457815cbec1SBarry Smith if (reuse == MAT_INITIAL_MATRIX || *B != A) { 14582d61bbb3SSatish Balay *B = C; 14592d61bbb3SSatish Balay } else { 1460273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 14612d61bbb3SSatish Balay } 14622d61bbb3SSatish Balay PetscFunctionReturn(0); 14632d61bbb3SSatish Balay } 14642d61bbb3SSatish Balay 14654a2ae208SSatish Balay #undef __FUNCT__ 14664a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 14676849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 14682593348eSBarry Smith { 1469b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 14706849ba73SBarry Smith PetscErrorCode ierr; 1471d0f46423SBarry Smith PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1472b24ad042SBarry Smith int fd; 147387828ca2SBarry Smith PetscScalar *aa; 1474ce6f0cecSBarry Smith FILE *file; 14752593348eSBarry Smith 14763a40ed3dSBarry Smith PetscFunctionBegin; 1477b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1478d0f46423SBarry Smith ierr = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr); 1479552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 14803b2fbd54SBarry Smith 1481d0f46423SBarry Smith col_lens[1] = A->rmap->N; 1482d0f46423SBarry Smith col_lens[2] = A->cmap->n; 14837e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 14842593348eSBarry Smith 14852593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 1486b6490206SBarry Smith count = 0; 1487b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1488b6490206SBarry Smith for (j=0; j<bs; j++) { 1489b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1490b6490206SBarry Smith } 14912593348eSBarry Smith } 1492d0f46423SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1493606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 14942593348eSBarry Smith 14952593348eSBarry Smith /* store column indices (zero start index) */ 1496c1ac3661SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1497b6490206SBarry Smith count = 0; 1498b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1499b6490206SBarry Smith for (j=0; j<bs; j++) { 1500b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1501b6490206SBarry Smith for (l=0; l<bs; l++) { 1502b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 15032593348eSBarry Smith } 15042593348eSBarry Smith } 1505b6490206SBarry Smith } 1506b6490206SBarry Smith } 15076f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1508606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 15092593348eSBarry Smith 15102593348eSBarry Smith /* store nonzero values */ 151187828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1512b6490206SBarry Smith count = 0; 1513b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1514b6490206SBarry Smith for (j=0; j<bs; j++) { 1515b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1516b6490206SBarry Smith for (l=0; l<bs; l++) { 15177e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 1518b6490206SBarry Smith } 1519b6490206SBarry Smith } 1520b6490206SBarry Smith } 1521b6490206SBarry Smith } 15226f69ff64SBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1523606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1524ce6f0cecSBarry Smith 1525b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1526ce6f0cecSBarry Smith if (file) { 1527d0f46423SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1528ce6f0cecSBarry Smith } 15293a40ed3dSBarry Smith PetscFunctionReturn(0); 15302593348eSBarry Smith } 15312593348eSBarry Smith 15324a2ae208SSatish Balay #undef __FUNCT__ 15334a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 15346849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 15352593348eSBarry Smith { 1536b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1537dfbe8321SBarry Smith PetscErrorCode ierr; 1538d0f46423SBarry Smith PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1539f3ef73ceSBarry Smith PetscViewerFormat format; 15402593348eSBarry Smith 15413a40ed3dSBarry Smith PetscFunctionBegin; 1542b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1543456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 154477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1545fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1546bcd9e38bSBarry Smith Mat aij; 1547ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1548bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 1549bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 155004929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 155104929863SHong Zhang PetscFunctionReturn(0); 1552fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1553b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 155444cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 155544cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 155677431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 155744cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 155844cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 1559aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15600e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1561a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l, 15620e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15630e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1564a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l, 15650e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15660e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1567a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15680ef38995SBarry Smith } 156944cd7ae7SLois Curfman McInnes #else 15700ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 1571a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 15720ef38995SBarry Smith } 157344cd7ae7SLois Curfman McInnes #endif 157444cd7ae7SLois Curfman McInnes } 157544cd7ae7SLois Curfman McInnes } 1576b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 157744cd7ae7SLois Curfman McInnes } 157844cd7ae7SLois Curfman McInnes } 1579b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 15800ef38995SBarry Smith } else { 1581b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 1582b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 1583b6490206SBarry Smith for (j=0; j<bs; j++) { 158477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1585b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 1586b6490206SBarry Smith for (l=0; l<bs; l++) { 1587aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 15880e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1589a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 15900e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15910e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1592a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 15930e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 15940ef38995SBarry Smith } else { 1595a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 159688685aaeSLois Curfman McInnes } 159788685aaeSLois Curfman McInnes #else 1598a83599f4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 159988685aaeSLois Curfman McInnes #endif 16002593348eSBarry Smith } 16012593348eSBarry Smith } 1602b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 16032593348eSBarry Smith } 16042593348eSBarry Smith } 1605b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 1606b6490206SBarry Smith } 1607b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 16083a40ed3dSBarry Smith PetscFunctionReturn(0); 16092593348eSBarry Smith } 16102593348eSBarry Smith 16114a2ae208SSatish Balay #undef __FUNCT__ 16124a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 16136849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 16143270192aSSatish Balay { 161577ed5343SBarry Smith Mat A = (Mat) Aa; 16163270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 16176849ba73SBarry Smith PetscErrorCode ierr; 1618d0f46423SBarry Smith PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 16190e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 16203f1db9ecSBarry Smith MatScalar *aa; 1621b0a32e0cSBarry Smith PetscViewer viewer; 16223270192aSSatish Balay 16233a40ed3dSBarry Smith PetscFunctionBegin; 16243270192aSSatish Balay 1625b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 162677ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 162777ed5343SBarry Smith 1628b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 162977ed5343SBarry Smith 16303270192aSSatish Balay /* loop over matrix elements drawing boxes */ 1631b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 16323270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16333270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1634d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16353270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16363270192aSSatish Balay aa = a->a + j*bs2; 16373270192aSSatish Balay for (k=0; k<bs; k++) { 16383270192aSSatish Balay for (l=0; l<bs; l++) { 16390e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 1640b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16413270192aSSatish Balay } 16423270192aSSatish Balay } 16433270192aSSatish Balay } 16443270192aSSatish Balay } 1645b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 16463270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16473270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1648d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16493270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16503270192aSSatish Balay aa = a->a + j*bs2; 16513270192aSSatish Balay for (k=0; k<bs; k++) { 16523270192aSSatish Balay for (l=0; l<bs; l++) { 16530e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 1654b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16553270192aSSatish Balay } 16563270192aSSatish Balay } 16573270192aSSatish Balay } 16583270192aSSatish Balay } 16593270192aSSatish Balay 1660b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 16613270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 16623270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 1663d0f46423SBarry Smith y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 16643270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 16653270192aSSatish Balay aa = a->a + j*bs2; 16663270192aSSatish Balay for (k=0; k<bs; k++) { 16673270192aSSatish Balay for (l=0; l<bs; l++) { 16680e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 1669b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 16703270192aSSatish Balay } 16713270192aSSatish Balay } 16723270192aSSatish Balay } 16733270192aSSatish Balay } 167477ed5343SBarry Smith PetscFunctionReturn(0); 167577ed5343SBarry Smith } 16763270192aSSatish Balay 16774a2ae208SSatish Balay #undef __FUNCT__ 16784a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 16796849ba73SBarry Smith static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 168077ed5343SBarry Smith { 1681dfbe8321SBarry Smith PetscErrorCode ierr; 16820e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 1683b0a32e0cSBarry Smith PetscDraw draw; 168477ed5343SBarry Smith PetscTruth isnull; 16853270192aSSatish Balay 168677ed5343SBarry Smith PetscFunctionBegin; 168777ed5343SBarry Smith 1688b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1689b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 169077ed5343SBarry Smith 169177ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1692d0f46423SBarry Smith xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 169377ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 1694b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1695b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 169677ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 16973a40ed3dSBarry Smith PetscFunctionReturn(0); 16983270192aSSatish Balay } 16993270192aSSatish Balay 17004a2ae208SSatish Balay #undef __FUNCT__ 17014a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 1702dfbe8321SBarry Smith PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 17032593348eSBarry Smith { 1704dfbe8321SBarry Smith PetscErrorCode ierr; 170532077d6dSBarry Smith PetscTruth iascii,isbinary,isdraw; 17062593348eSBarry Smith 17073a40ed3dSBarry Smith PetscFunctionBegin; 170832077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 1709fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 1710fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 171132077d6dSBarry Smith if (iascii){ 17123a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 17130f5bd95cSBarry Smith } else if (isbinary) { 17143a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 17150f5bd95cSBarry Smith } else if (isdraw) { 17163a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 17175cd90555SBarry Smith } else { 1718a5e6ed63SBarry Smith Mat B; 1719ceb03754SKris Buschelman ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1720a5e6ed63SBarry Smith ierr = MatView(B,viewer);CHKERRQ(ierr); 1721a5e6ed63SBarry Smith ierr = MatDestroy(B);CHKERRQ(ierr); 17222593348eSBarry Smith } 17233a40ed3dSBarry Smith PetscFunctionReturn(0); 17242593348eSBarry Smith } 1725b6490206SBarry Smith 1726cd0e1443SSatish Balay 17274a2ae208SSatish Balay #undef __FUNCT__ 17284a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 1729c1ac3661SBarry Smith PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1730cd0e1443SSatish Balay { 1731cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1732c1ac3661SBarry Smith PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1733c1ac3661SBarry Smith PetscInt *ai = a->i,*ailen = a->ilen; 1734d0f46423SBarry Smith PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 173597e567efSBarry Smith MatScalar *ap,*aa = a->a; 1736cd0e1443SSatish Balay 17373a40ed3dSBarry Smith PetscFunctionBegin; 17382d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 1739cd0e1443SSatish Balay row = im[k]; brow = row/bs; 174097e567efSBarry Smith if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1741d0f46423SBarry Smith if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 17422d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 17432c3acbe9SBarry Smith nrow = ailen[brow]; 17442d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 174597e567efSBarry Smith if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1746d0f46423SBarry Smith if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 17472d61bbb3SSatish Balay col = in[l] ; 17482d61bbb3SSatish Balay bcol = col/bs; 17492d61bbb3SSatish Balay cidx = col%bs; 17502d61bbb3SSatish Balay ridx = row%bs; 17512d61bbb3SSatish Balay high = nrow; 17522d61bbb3SSatish Balay low = 0; /* assume unsorted */ 17532d61bbb3SSatish Balay while (high-low > 5) { 1754cd0e1443SSatish Balay t = (low+high)/2; 1755cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 1756cd0e1443SSatish Balay else low = t; 1757cd0e1443SSatish Balay } 1758cd0e1443SSatish Balay for (i=low; i<high; i++) { 1759cd0e1443SSatish Balay if (rp[i] > bcol) break; 1760cd0e1443SSatish Balay if (rp[i] == bcol) { 17612d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 17622d61bbb3SSatish Balay goto finished; 1763cd0e1443SSatish Balay } 1764cd0e1443SSatish Balay } 176597e567efSBarry Smith *v++ = 0.0; 17662d61bbb3SSatish Balay finished:; 1767cd0e1443SSatish Balay } 1768cd0e1443SSatish Balay } 17693a40ed3dSBarry Smith PetscFunctionReturn(0); 1770cd0e1443SSatish Balay } 1771cd0e1443SSatish Balay 177297e5c40aSBarry Smith #define CHUNKSIZE 10 17734a2ae208SSatish Balay #undef __FUNCT__ 17744a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1775dd6ea824SBarry Smith PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 177692c4ed94SBarry Smith { 177792c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1778e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1779c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 17806849ba73SBarry Smith PetscErrorCode ierr; 1781d0f46423SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1782273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 1783dd6ea824SBarry Smith const PetscScalar *value = v; 1784f15d580aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 178592c4ed94SBarry Smith 17863a40ed3dSBarry Smith PetscFunctionBegin; 17870e324ae4SSatish Balay if (roworiented) { 17880e324ae4SSatish Balay stepval = (n-1)*bs; 17890e324ae4SSatish Balay } else { 17900e324ae4SSatish Balay stepval = (m-1)*bs; 17910e324ae4SSatish Balay } 179292c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 179392c4ed94SBarry Smith row = im[k]; 17945ef9f2a5SBarry Smith if (row < 0) continue; 17952515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 179677431f27SBarry Smith if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 179792c4ed94SBarry Smith #endif 179892c4ed94SBarry Smith rp = aj + ai[row]; 179992c4ed94SBarry Smith ap = aa + bs2*ai[row]; 180092c4ed94SBarry Smith rmax = imax[row]; 180192c4ed94SBarry Smith nrow = ailen[row]; 180292c4ed94SBarry Smith low = 0; 1803c71e6ed7SBarry Smith high = nrow; 180492c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 18055ef9f2a5SBarry Smith if (in[l] < 0) continue; 18062515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 180777431f27SBarry Smith if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1); 180892c4ed94SBarry Smith #endif 180992c4ed94SBarry Smith col = in[l]; 181092c4ed94SBarry Smith if (roworiented) { 18110e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 18120e324ae4SSatish Balay } else { 18130e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 181492c4ed94SBarry Smith } 18157cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 1816e2ee6c50SBarry Smith lastcol = col; 181792c4ed94SBarry Smith while (high-low > 7) { 181892c4ed94SBarry Smith t = (low+high)/2; 181992c4ed94SBarry Smith if (rp[t] > col) high = t; 182092c4ed94SBarry Smith else low = t; 182192c4ed94SBarry Smith } 182292c4ed94SBarry Smith for (i=low; i<high; i++) { 182392c4ed94SBarry Smith if (rp[i] > col) break; 182492c4ed94SBarry Smith if (rp[i] == col) { 18258a84c255SSatish Balay bap = ap + bs2*i; 18260e324ae4SSatish Balay if (roworiented) { 18278a84c255SSatish Balay if (is == ADD_VALUES) { 1828dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1829dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18308a84c255SSatish Balay bap[jj] += *value++; 1831dd9472c6SBarry Smith } 1832dd9472c6SBarry Smith } 18330e324ae4SSatish Balay } else { 1834dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1835dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18360e324ae4SSatish Balay bap[jj] = *value++; 18378a84c255SSatish Balay } 1838dd9472c6SBarry Smith } 1839dd9472c6SBarry Smith } 18400e324ae4SSatish Balay } else { 18410e324ae4SSatish Balay if (is == ADD_VALUES) { 1842dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1843dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18440e324ae4SSatish Balay *bap++ += *value++; 1845dd9472c6SBarry Smith } 1846dd9472c6SBarry Smith } 18470e324ae4SSatish Balay } else { 1848dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1849dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18500e324ae4SSatish Balay *bap++ = *value++; 18510e324ae4SSatish Balay } 18528a84c255SSatish Balay } 1853dd9472c6SBarry Smith } 1854dd9472c6SBarry Smith } 1855f1241b54SBarry Smith goto noinsert2; 185692c4ed94SBarry Smith } 185792c4ed94SBarry Smith } 185889280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 1859085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 1860421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1861c03d1d03SSatish Balay N = nrow++ - 1; high++; 186292c4ed94SBarry Smith /* shift up all the later entries in this row */ 186392c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 186492c4ed94SBarry Smith rp[ii+1] = rp[ii]; 1865549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 186692c4ed94SBarry Smith } 1867549d3d68SSatish Balay if (N >= i) { 1868549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1869549d3d68SSatish Balay } 187092c4ed94SBarry Smith rp[i] = col; 18718a84c255SSatish Balay bap = ap + bs2*i; 18720e324ae4SSatish Balay if (roworiented) { 1873dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1874dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 18750e324ae4SSatish Balay bap[jj] = *value++; 1876dd9472c6SBarry Smith } 1877dd9472c6SBarry Smith } 18780e324ae4SSatish Balay } else { 1879dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 1880dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 18810e324ae4SSatish Balay *bap++ = *value++; 18820e324ae4SSatish Balay } 1883dd9472c6SBarry Smith } 1884dd9472c6SBarry Smith } 1885f1241b54SBarry Smith noinsert2:; 188692c4ed94SBarry Smith low = i; 188792c4ed94SBarry Smith } 188892c4ed94SBarry Smith ailen[row] = nrow; 188992c4ed94SBarry Smith } 18903a40ed3dSBarry Smith PetscFunctionReturn(0); 189192c4ed94SBarry Smith } 189226e093fcSHong Zhang 18934a2ae208SSatish Balay #undef __FUNCT__ 18944a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1895dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1896584200bdSSatish Balay { 1897584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1898c1ac3661SBarry Smith PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1899d0f46423SBarry Smith PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 19006849ba73SBarry Smith PetscErrorCode ierr; 1901c1ac3661SBarry Smith PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 19023f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 19033447b6efSHong Zhang PetscReal ratio=0.6; 1904584200bdSSatish Balay 19053a40ed3dSBarry Smith PetscFunctionBegin; 19063a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1907584200bdSSatish Balay 190843ee02c3SBarry Smith if (m) rmax = ailen[0]; 1909584200bdSSatish Balay for (i=1; i<mbs; i++) { 1910584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1911584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1912d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1913584200bdSSatish Balay if (fshift) { 1914a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1915584200bdSSatish Balay N = ailen[i]; 1916584200bdSSatish Balay for (j=0; j<N; j++) { 1917584200bdSSatish Balay ip[j-fshift] = ip[j]; 1918549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1919584200bdSSatish Balay } 1920584200bdSSatish Balay } 1921584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1922584200bdSSatish Balay } 1923584200bdSSatish Balay if (mbs) { 1924584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1925584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1926584200bdSSatish Balay } 1927584200bdSSatish Balay /* reset ilen and imax for each row */ 1928584200bdSSatish Balay for (i=0; i<mbs; i++) { 1929584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1930584200bdSSatish Balay } 1931a7c10996SSatish Balay a->nz = ai[mbs]; 1932584200bdSSatish Balay 1933584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1934b01c7715SBarry Smith a->idiagvalid = PETSC_FALSE; 1935584200bdSSatish Balay if (fshift && a->diag) { 1936606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 193752e6d16bSBarry Smith ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1938584200bdSSatish Balay a->diag = 0; 1939584200bdSSatish Balay } 194028b2fa4aSMatthew Knepley if (fshift && a->nounused == -1) { 194128b2fa4aSMatthew Knepley SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 194228b2fa4aSMatthew Knepley } 1943d0f46423SBarry Smith ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1944ae15b995SBarry Smith ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1945ae15b995SBarry Smith ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1946e2f3b5e9SSatish Balay a->reallocs = 0; 19470e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1948cf4441caSHong Zhang 1949cb5d8e9eSHong Zhang /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */ 19502f53aa61SHong Zhang if (a->compressedrow.use){ 1951317fbc4cSHong Zhang ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 195273e7a558SHong Zhang } 195388e51ccdSHong Zhang 195488e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 19553a40ed3dSBarry Smith PetscFunctionReturn(0); 1956584200bdSSatish Balay } 1957584200bdSSatish Balay 1958bea157c4SSatish Balay /* 1959bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1960bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1961bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1962bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1963bea157c4SSatish Balay */ 19644a2ae208SSatish Balay #undef __FUNCT__ 19654a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1966c1ac3661SBarry Smith static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1967d9b7c43dSSatish Balay { 1968c1ac3661SBarry Smith PetscInt i,j,k,row; 1969bea157c4SSatish Balay PetscTruth flg; 19703a40ed3dSBarry Smith 1971433994e6SBarry Smith PetscFunctionBegin; 1972bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1973bea157c4SSatish Balay row = idx[i]; 1974bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1975bea157c4SSatish Balay sizes[j] = 1; 1976bea157c4SSatish Balay i++; 1977e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1978bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1979bea157c4SSatish Balay i++; 1980bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1981bea157c4SSatish Balay flg = PETSC_TRUE; 1982bea157c4SSatish Balay for (k=1; k<bs; k++) { 1983bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1984bea157c4SSatish Balay flg = PETSC_FALSE; 1985bea157c4SSatish Balay break; 1986d9b7c43dSSatish Balay } 1987bea157c4SSatish Balay } 1988abc0a331SBarry Smith if (flg) { /* No break in the bs */ 1989bea157c4SSatish Balay sizes[j] = bs; 1990bea157c4SSatish Balay i+= bs; 1991bea157c4SSatish Balay } else { 1992bea157c4SSatish Balay sizes[j] = 1; 1993bea157c4SSatish Balay i++; 1994bea157c4SSatish Balay } 1995bea157c4SSatish Balay } 1996bea157c4SSatish Balay } 1997bea157c4SSatish Balay *bs_max = j; 19983a40ed3dSBarry Smith PetscFunctionReturn(0); 1999d9b7c43dSSatish Balay } 2000d9b7c43dSSatish Balay 20014a2ae208SSatish Balay #undef __FUNCT__ 20024a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2003f4df32b1SMatthew Knepley PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag) 2004d9b7c43dSSatish Balay { 2005d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2006dfbe8321SBarry Smith PetscErrorCode ierr; 2007f4df32b1SMatthew Knepley PetscInt i,j,k,count,*rows; 2008d0f46423SBarry Smith PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 200987828ca2SBarry Smith PetscScalar zero = 0.0; 20103f1db9ecSBarry Smith MatScalar *aa; 2011d9b7c43dSSatish Balay 20123a40ed3dSBarry Smith PetscFunctionBegin; 2013d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 2014bea157c4SSatish Balay /* allocate memory for rows,sizes */ 2015c1ac3661SBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 2016bea157c4SSatish Balay sizes = rows + is_n; 2017bea157c4SSatish Balay 2018563b5814SBarry Smith /* copy IS values to rows, and sort them */ 2019bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 2020bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2021a9817697SBarry Smith if (baij->keepnonzeropattern) { 2022dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 2023dffd3267SBarry Smith bs_max = is_n; 202488e51ccdSHong Zhang A->same_nonzero = PETSC_TRUE; 2025dffd3267SBarry Smith } else { 2026bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 202788e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 2028dffd3267SBarry Smith } 2029bea157c4SSatish Balay 2030bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2031bea157c4SSatish Balay row = rows[j]; 2032d0f46423SBarry Smith if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2033bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2034b31fbe3bSSatish Balay aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2035a9817697SBarry Smith if (sizes[i] == bs && !baij->keepnonzeropattern) { 2036f4df32b1SMatthew Knepley if (diag != 0.0) { 2037bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 2038bea157c4SSatish Balay baij->ilen[row/bs] = 1; 2039bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 2040bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2041a07cd24cSSatish Balay } 2042563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 2043bea157c4SSatish Balay for (k=0; k<bs; k++) { 2044f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2045bea157c4SSatish Balay } 2046f4df32b1SMatthew Knepley } else { /* (diag == 0.0) */ 2047bea157c4SSatish Balay baij->ilen[row/bs] = 0; 2048f4df32b1SMatthew Knepley } /* end (diag == 0.0) */ 2049bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 2050aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 2051634064b4SBarry Smith if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2052bea157c4SSatish Balay #endif 2053bea157c4SSatish Balay for (k=0; k<count; k++) { 2054d9b7c43dSSatish Balay aa[0] = zero; 2055d9b7c43dSSatish Balay aa += bs; 2056d9b7c43dSSatish Balay } 2057f4df32b1SMatthew Knepley if (diag != 0.0) { 2058f4df32b1SMatthew Knepley ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059d9b7c43dSSatish Balay } 2060d9b7c43dSSatish Balay } 2061bea157c4SSatish Balay } 2062bea157c4SSatish Balay 2063606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 20649a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 20653a40ed3dSBarry Smith PetscFunctionReturn(0); 2066d9b7c43dSSatish Balay } 20671c351548SSatish Balay 20684a2ae208SSatish Balay #undef __FUNCT__ 20694a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 2070c1ac3661SBarry Smith PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 20712d61bbb3SSatish Balay { 20722d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2073e2ee6c50SBarry Smith PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2074c1ac3661SBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2075d0f46423SBarry Smith PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 20766849ba73SBarry Smith PetscErrorCode ierr; 2077c1ac3661SBarry Smith PetscInt ridx,cidx,bs2=a->bs2; 2078273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 20793f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 20802d61bbb3SSatish Balay 20812d61bbb3SSatish Balay PetscFunctionBegin; 20822d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 2083085a36d4SBarry Smith row = im[k]; 2084085a36d4SBarry Smith brow = row/bs; 20855ef9f2a5SBarry Smith if (row < 0) continue; 20862515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2087d0f46423SBarry Smith if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 20882d61bbb3SSatish Balay #endif 20892d61bbb3SSatish Balay rp = aj + ai[brow]; 20902d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 20912d61bbb3SSatish Balay rmax = imax[brow]; 20922d61bbb3SSatish Balay nrow = ailen[brow]; 20932d61bbb3SSatish Balay low = 0; 2094c71e6ed7SBarry Smith high = nrow; 20952d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 20965ef9f2a5SBarry Smith if (in[l] < 0) continue; 20972515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 2098d0f46423SBarry Smith if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 20992d61bbb3SSatish Balay #endif 21002d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 21012d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 21022d61bbb3SSatish Balay if (roworiented) { 21035ef9f2a5SBarry Smith value = v[l + k*n]; 21042d61bbb3SSatish Balay } else { 21052d61bbb3SSatish Balay value = v[k + l*m]; 21062d61bbb3SSatish Balay } 21077cd84e04SBarry Smith if (col <= lastcol) low = 0; else high = nrow; 2108e2ee6c50SBarry Smith lastcol = col; 21092d61bbb3SSatish Balay while (high-low > 7) { 21102d61bbb3SSatish Balay t = (low+high)/2; 21112d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 21122d61bbb3SSatish Balay else low = t; 21132d61bbb3SSatish Balay } 21142d61bbb3SSatish Balay for (i=low; i<high; i++) { 21152d61bbb3SSatish Balay if (rp[i] > bcol) break; 21162d61bbb3SSatish Balay if (rp[i] == bcol) { 21172d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 21182d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 21192d61bbb3SSatish Balay else *bap = value; 21202d61bbb3SSatish Balay goto noinsert1; 21212d61bbb3SSatish Balay } 21222d61bbb3SSatish Balay } 21232d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 2124085a36d4SBarry Smith if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2125421e10b8SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2126c03d1d03SSatish Balay N = nrow++ - 1; high++; 21272d61bbb3SSatish Balay /* shift up all the later entries in this row */ 21282d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 21292d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 2130549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 21312d61bbb3SSatish Balay } 2132549d3d68SSatish Balay if (N>=i) { 2133549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2134549d3d68SSatish Balay } 21352d61bbb3SSatish Balay rp[i] = bcol; 21362d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 2137085a36d4SBarry Smith a->nz++; 21382d61bbb3SSatish Balay noinsert1:; 21392d61bbb3SSatish Balay low = i; 21402d61bbb3SSatish Balay } 21412d61bbb3SSatish Balay ailen[brow] = nrow; 21422d61bbb3SSatish Balay } 214388e51ccdSHong Zhang A->same_nonzero = PETSC_FALSE; 21442d61bbb3SSatish Balay PetscFunctionReturn(0); 21452d61bbb3SSatish Balay } 21462d61bbb3SSatish Balay 2147db4efbfdSBarry Smith EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth); 21482d61bbb3SSatish Balay 21494a2ae208SSatish Balay #undef __FUNCT__ 21504a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 21510481f469SBarry Smith PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 21522d61bbb3SSatish Balay { 21532d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 21542d61bbb3SSatish Balay Mat outA; 2155dfbe8321SBarry Smith PetscErrorCode ierr; 2156667159a5SBarry Smith PetscTruth row_identity,col_identity; 21572d61bbb3SSatish Balay 21582d61bbb3SSatish Balay PetscFunctionBegin; 2159d3d32019SBarry Smith if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2160667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2161667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2162667159a5SBarry Smith if (!row_identity || !col_identity) { 2163634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2164667159a5SBarry Smith } 21652d61bbb3SSatish Balay 21662d61bbb3SSatish Balay outA = inA; 21675c9eb25fSBarry Smith inA->factor = MAT_FACTOR_LU; 21682d61bbb3SSatish Balay 2169c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2170cf242676SKris Buschelman 2171c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2172c3122656SLisandro Dalcin if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 2173c3122656SLisandro Dalcin a->row = row; 2174c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2175c3122656SLisandro Dalcin if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 2176c3122656SLisandro Dalcin a->col = col; 2177c38d4ed2SBarry Smith 2178c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 21794c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 218052e6d16bSBarry Smith ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 2181c38d4ed2SBarry Smith 218258e7c4b3SSatish Balay ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr); 2183c38d4ed2SBarry Smith if (!a->solve_work) { 2184d0f46423SBarry Smith ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 2185d0f46423SBarry Smith ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2186c38d4ed2SBarry Smith } 2187719d5645SBarry Smith ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2188667159a5SBarry Smith 21892d61bbb3SSatish Balay PetscFunctionReturn(0); 21902d61bbb3SSatish Balay } 2191d9b7c43dSSatish Balay 2192fb2e594dSBarry Smith EXTERN_C_BEGIN 21934a2ae208SSatish Balay #undef __FUNCT__ 21944a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2195be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 219627a8da17SBarry Smith { 219727a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2198bdb1c0e1SJed Brown PetscInt i,nz,mbs; 219927a8da17SBarry Smith 220027a8da17SBarry Smith PetscFunctionBegin; 220114db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 2202bdb1c0e1SJed Brown mbs = baij->mbs; 220327a8da17SBarry Smith for (i=0; i<nz; i++) { 220427a8da17SBarry Smith baij->j[i] = indices[i]; 220527a8da17SBarry Smith } 220627a8da17SBarry Smith baij->nz = nz; 2207bdb1c0e1SJed Brown for (i=0; i<mbs; i++) { 220827a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 220927a8da17SBarry Smith } 221027a8da17SBarry Smith PetscFunctionReturn(0); 221127a8da17SBarry Smith } 2212fb2e594dSBarry Smith EXTERN_C_END 221327a8da17SBarry Smith 22144a2ae208SSatish Balay #undef __FUNCT__ 22154a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 221627a8da17SBarry Smith /*@ 221727a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 221827a8da17SBarry Smith in the matrix. 221927a8da17SBarry Smith 222027a8da17SBarry Smith Input Parameters: 222127a8da17SBarry Smith + mat - the SeqBAIJ matrix 222227a8da17SBarry Smith - indices - the column indices 222327a8da17SBarry Smith 222415091d37SBarry Smith Level: advanced 222515091d37SBarry Smith 222627a8da17SBarry Smith Notes: 222727a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 222827a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 222927a8da17SBarry Smith of the MatSetValues() operation. 223027a8da17SBarry Smith 223127a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 2232d1be2dadSMatthew Knepley MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 223327a8da17SBarry Smith 223427a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 223527a8da17SBarry Smith 223627a8da17SBarry Smith @*/ 2237be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 223827a8da17SBarry Smith { 2239c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt *); 224027a8da17SBarry Smith 224127a8da17SBarry Smith PetscFunctionBegin; 22424482741eSBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 22434482741eSBarry Smith PetscValidPointer(indices,2); 2244c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 224527a8da17SBarry Smith if (f) { 224627a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 224727a8da17SBarry Smith } else { 2248634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices"); 224927a8da17SBarry Smith } 225027a8da17SBarry Smith PetscFunctionReturn(0); 225127a8da17SBarry Smith } 225227a8da17SBarry Smith 22534a2ae208SSatish Balay #undef __FUNCT__ 2254985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2255985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2256273d9f13SBarry Smith { 2257273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2258dfbe8321SBarry Smith PetscErrorCode ierr; 2259c1ac3661SBarry Smith PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2260273d9f13SBarry Smith PetscReal atmp; 226187828ca2SBarry Smith PetscScalar *x,zero = 0.0; 2262273d9f13SBarry Smith MatScalar *aa; 2263c1ac3661SBarry Smith PetscInt ncols,brow,krow,kcol; 2264273d9f13SBarry Smith 2265273d9f13SBarry Smith PetscFunctionBegin; 2266273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2267d0f46423SBarry Smith bs = A->rmap->bs; 2268273d9f13SBarry Smith aa = a->a; 2269273d9f13SBarry Smith ai = a->i; 2270273d9f13SBarry Smith aj = a->j; 2271273d9f13SBarry Smith mbs = a->mbs; 2272273d9f13SBarry Smith 22732dcb1b2aSMatthew Knepley ierr = VecSet(v,zero);CHKERRQ(ierr); 22741ebc52fbSHong Zhang ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2275273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2276d0f46423SBarry Smith if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2277273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2278273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 2279273d9f13SBarry Smith brow = bs*i; 2280273d9f13SBarry Smith for (j=0; j<ncols; j++){ 2281273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 2282273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 2283273d9f13SBarry Smith atmp = PetscAbsScalar(*aa);aa++; 2284273d9f13SBarry Smith row = brow + krow; /* row index */ 2285a83599f4SBarry Smith /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */ 2286985db425SBarry Smith if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2287273d9f13SBarry Smith } 2288273d9f13SBarry Smith } 2289273d9f13SBarry Smith aj++; 2290273d9f13SBarry Smith } 2291273d9f13SBarry Smith } 22921ebc52fbSHong Zhang ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2293273d9f13SBarry Smith PetscFunctionReturn(0); 2294273d9f13SBarry Smith } 2295273d9f13SBarry Smith 22964a2ae208SSatish Balay #undef __FUNCT__ 22973c896bc6SHong Zhang #define __FUNCT__ "MatCopy_SeqBAIJ" 22983c896bc6SHong Zhang PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 22993c896bc6SHong Zhang { 23003c896bc6SHong Zhang PetscErrorCode ierr; 23013c896bc6SHong Zhang 23023c896bc6SHong Zhang PetscFunctionBegin; 23033c896bc6SHong Zhang /* If the two matrices have the same copy implementation, use fast copy. */ 23043c896bc6SHong Zhang if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 23053c896bc6SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 23063c896bc6SHong Zhang Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 23073c896bc6SHong Zhang 2308d0f46423SBarry Smith if (a->i[A->rmap->N] != b->i[B->rmap->N]) { 23093c896bc6SHong Zhang SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 23103c896bc6SHong Zhang } 2311d0f46423SBarry Smith ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr); 23123c896bc6SHong Zhang } else { 23133c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 23143c896bc6SHong Zhang } 23153c896bc6SHong Zhang PetscFunctionReturn(0); 23163c896bc6SHong Zhang } 23173c896bc6SHong Zhang 23183c896bc6SHong Zhang #undef __FUNCT__ 23194a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 2320dfbe8321SBarry Smith PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A) 2321273d9f13SBarry Smith { 2322dfbe8321SBarry Smith PetscErrorCode ierr; 2323273d9f13SBarry Smith 2324273d9f13SBarry Smith PetscFunctionBegin; 2325db4efbfdSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 2326273d9f13SBarry Smith PetscFunctionReturn(0); 2327273d9f13SBarry Smith } 2328273d9f13SBarry Smith 23294a2ae208SSatish Balay #undef __FUNCT__ 23304a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 2331dfbe8321SBarry Smith PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2332f2a5309cSSatish Balay { 2333f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2334f2a5309cSSatish Balay PetscFunctionBegin; 2335f2a5309cSSatish Balay *array = a->a; 2336f2a5309cSSatish Balay PetscFunctionReturn(0); 2337f2a5309cSSatish Balay } 2338f2a5309cSSatish Balay 23394a2ae208SSatish Balay #undef __FUNCT__ 23404a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 2341dfbe8321SBarry Smith PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2342f2a5309cSSatish Balay { 2343f2a5309cSSatish Balay PetscFunctionBegin; 2344f2a5309cSSatish Balay PetscFunctionReturn(0); 2345f2a5309cSSatish Balay } 2346f2a5309cSSatish Balay 234742ee4b1aSHong Zhang #include "petscblaslapack.h" 234842ee4b1aSHong Zhang #undef __FUNCT__ 234942ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 2350f4df32b1SMatthew Knepley PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 235142ee4b1aSHong Zhang { 235242ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 2353dfbe8321SBarry Smith PetscErrorCode ierr; 2354d0f46423SBarry Smith PetscInt i,bs=Y->rmap->bs,j,bs2; 23550805154bSBarry Smith PetscBLASInt one=1,bnz = PetscBLASIntCast(x->nz); 235642ee4b1aSHong Zhang 235742ee4b1aSHong Zhang PetscFunctionBegin; 235842ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 2359f4df32b1SMatthew Knepley PetscScalar alpha = a; 2360f4df32b1SMatthew Knepley BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2361c537a176SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2362c4319e64SHong Zhang if (y->xtoy && y->XtoY != X) { 2363c4319e64SHong Zhang ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2364c4319e64SHong Zhang ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 2365c537a176SHong Zhang } 2366c4319e64SHong Zhang if (!y->xtoy) { /* get xtoy */ 2367c4319e64SHong Zhang ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 2368c4319e64SHong Zhang y->XtoY = X; 2369c009d632SSatish Balay ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr); 2370c537a176SHong Zhang } 2371c4319e64SHong Zhang bs2 = bs*bs; 2372c537a176SHong Zhang for (i=0; i<x->nz; i++) { 2373c4319e64SHong Zhang j = 0; 2374c4319e64SHong Zhang while (j < bs2){ 2375f4df32b1SMatthew Knepley y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 2376c4319e64SHong Zhang j++; 2377c537a176SHong Zhang } 2378c4319e64SHong Zhang } 23791e2582c4SBarry Smith ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 238042ee4b1aSHong Zhang } else { 2381f4df32b1SMatthew Knepley ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 238242ee4b1aSHong Zhang } 238342ee4b1aSHong Zhang PetscFunctionReturn(0); 238442ee4b1aSHong Zhang } 238542ee4b1aSHong Zhang 238699cafbc1SBarry Smith #undef __FUNCT__ 238799cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_SeqBAIJ" 238899cafbc1SBarry Smith PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 238999cafbc1SBarry Smith { 239099cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 239199cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2392dd6ea824SBarry Smith MatScalar *aa = a->a; 239399cafbc1SBarry Smith 239499cafbc1SBarry Smith PetscFunctionBegin; 239599cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 239699cafbc1SBarry Smith PetscFunctionReturn(0); 239799cafbc1SBarry Smith } 239899cafbc1SBarry Smith 239999cafbc1SBarry Smith #undef __FUNCT__ 240099cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 240199cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 240299cafbc1SBarry Smith { 240399cafbc1SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 240499cafbc1SBarry Smith PetscInt i,nz = a->bs2*a->i[a->mbs]; 2405dd6ea824SBarry Smith MatScalar *aa = a->a; 240699cafbc1SBarry Smith 240799cafbc1SBarry Smith PetscFunctionBegin; 240899cafbc1SBarry Smith for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 240999cafbc1SBarry Smith PetscFunctionReturn(0); 241099cafbc1SBarry Smith } 241199cafbc1SBarry Smith 24123acb8795SBarry Smith extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring); 24133acb8795SBarry Smith 24143acb8795SBarry Smith #undef __FUNCT__ 24153acb8795SBarry Smith #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 24163acb8795SBarry Smith /* 24173acb8795SBarry Smith Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 24183acb8795SBarry Smith */ 24193acb8795SBarry Smith PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 24203acb8795SBarry Smith { 24213acb8795SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 24223acb8795SBarry Smith PetscErrorCode ierr; 24233acb8795SBarry Smith PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 24243acb8795SBarry Smith PetscInt nz = a->i[m],row,*jj,mr,col; 24253acb8795SBarry Smith 24263acb8795SBarry Smith PetscFunctionBegin; 24273acb8795SBarry Smith *nn = n; 24283acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 24293acb8795SBarry Smith if (symmetric) { 24303acb8795SBarry Smith SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices"); 24313acb8795SBarry Smith } else { 24323acb8795SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr); 24333acb8795SBarry Smith ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 24343acb8795SBarry Smith ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr); 24353acb8795SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr); 24363acb8795SBarry Smith jj = a->j; 24373acb8795SBarry Smith for (i=0; i<nz; i++) { 24383acb8795SBarry Smith collengths[jj[i]]++; 24393acb8795SBarry Smith } 24403acb8795SBarry Smith cia[0] = oshift; 24413acb8795SBarry Smith for (i=0; i<n; i++) { 24423acb8795SBarry Smith cia[i+1] = cia[i] + collengths[i]; 24433acb8795SBarry Smith } 24443acb8795SBarry Smith ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 24453acb8795SBarry Smith jj = a->j; 24463acb8795SBarry Smith for (row=0; row<m; row++) { 24473acb8795SBarry Smith mr = a->i[row+1] - a->i[row]; 24483acb8795SBarry Smith for (i=0; i<mr; i++) { 24493acb8795SBarry Smith col = *jj++; 24503acb8795SBarry Smith cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 24513acb8795SBarry Smith } 24523acb8795SBarry Smith } 24533acb8795SBarry Smith ierr = PetscFree(collengths);CHKERRQ(ierr); 24543acb8795SBarry Smith *ia = cia; *ja = cja; 24553acb8795SBarry Smith } 24563acb8795SBarry Smith PetscFunctionReturn(0); 24573acb8795SBarry Smith } 24583acb8795SBarry Smith 24593acb8795SBarry Smith #undef __FUNCT__ 24603acb8795SBarry Smith #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 24613acb8795SBarry Smith PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 24623acb8795SBarry Smith { 24633acb8795SBarry Smith PetscErrorCode ierr; 24643acb8795SBarry Smith 24653acb8795SBarry Smith PetscFunctionBegin; 24663acb8795SBarry Smith if (!ia) PetscFunctionReturn(0); 24673acb8795SBarry Smith ierr = PetscFree(*ia);CHKERRQ(ierr); 24683acb8795SBarry Smith ierr = PetscFree(*ja);CHKERRQ(ierr); 24693acb8795SBarry Smith PetscFunctionReturn(0); 24703acb8795SBarry Smith } 24713acb8795SBarry Smith 2472*f6d58c54SBarry Smith #undef __FUNCT__ 2473*f6d58c54SBarry Smith #define __FUNCT__ "MatFDColoringApply_BAIJ" 2474*f6d58c54SBarry Smith PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx) 2475*f6d58c54SBarry Smith { 2476*f6d58c54SBarry Smith PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f; 2477*f6d58c54SBarry Smith PetscErrorCode ierr; 2478*f6d58c54SBarry Smith PetscInt bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2; 2479*f6d58c54SBarry Smith PetscScalar dx,*y,*xx,*w3_array; 2480*f6d58c54SBarry Smith PetscScalar *vscale_array; 2481*f6d58c54SBarry Smith PetscReal epsilon = coloring->error_rel,umin = coloring->umin,unorm; 2482*f6d58c54SBarry Smith Vec w1=coloring->w1,w2=coloring->w2,w3; 2483*f6d58c54SBarry Smith void *fctx = coloring->fctx; 2484*f6d58c54SBarry Smith PetscTruth flg = PETSC_FALSE; 2485*f6d58c54SBarry Smith PetscInt ctype=coloring->ctype,N,col_start=0,col_end=0; 2486*f6d58c54SBarry Smith Vec x1_tmp; 2487*f6d58c54SBarry Smith 2488*f6d58c54SBarry Smith PetscFunctionBegin; 2489*f6d58c54SBarry Smith PetscValidHeaderSpecific(J,MAT_COOKIE,1); 2490*f6d58c54SBarry Smith PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2); 2491*f6d58c54SBarry Smith PetscValidHeaderSpecific(x1,VEC_COOKIE,3); 2492*f6d58c54SBarry Smith if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()"); 2493*f6d58c54SBarry Smith 2494*f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2495*f6d58c54SBarry Smith ierr = MatSetUnfactored(J);CHKERRQ(ierr); 2496*f6d58c54SBarry Smith ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr); 2497*f6d58c54SBarry Smith if (flg) { 2498*f6d58c54SBarry Smith ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr); 2499*f6d58c54SBarry Smith } else { 2500*f6d58c54SBarry Smith PetscTruth assembled; 2501*f6d58c54SBarry Smith ierr = MatAssembled(J,&assembled);CHKERRQ(ierr); 2502*f6d58c54SBarry Smith if (assembled) { 2503*f6d58c54SBarry Smith ierr = MatZeroEntries(J);CHKERRQ(ierr); 2504*f6d58c54SBarry Smith } 2505*f6d58c54SBarry Smith } 2506*f6d58c54SBarry Smith 2507*f6d58c54SBarry Smith x1_tmp = x1; 2508*f6d58c54SBarry Smith if (!coloring->vscale){ 2509*f6d58c54SBarry Smith ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr); 2510*f6d58c54SBarry Smith } 2511*f6d58c54SBarry Smith 2512*f6d58c54SBarry Smith /* 2513*f6d58c54SBarry Smith This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets 2514*f6d58c54SBarry Smith coloring->F for the coarser grids from the finest 2515*f6d58c54SBarry Smith */ 2516*f6d58c54SBarry Smith if (coloring->F) { 2517*f6d58c54SBarry Smith ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr); 2518*f6d58c54SBarry Smith ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr); 2519*f6d58c54SBarry Smith if (m1 != m2) { 2520*f6d58c54SBarry Smith coloring->F = 0; 2521*f6d58c54SBarry Smith } 2522*f6d58c54SBarry Smith } 2523*f6d58c54SBarry Smith 2524*f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/ 2525*f6d58c54SBarry Smith ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr); 2526*f6d58c54SBarry Smith } 2527*f6d58c54SBarry Smith ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */ 2528*f6d58c54SBarry Smith 2529*f6d58c54SBarry Smith /* Set w1 = F(x1) */ 2530*f6d58c54SBarry Smith if (coloring->F) { 2531*f6d58c54SBarry Smith w1 = coloring->F; /* use already computed value of function */ 2532*f6d58c54SBarry Smith coloring->F = 0; 2533*f6d58c54SBarry Smith } else { 2534*f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2535*f6d58c54SBarry Smith ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr); 2536*f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2537*f6d58c54SBarry Smith } 2538*f6d58c54SBarry Smith 2539*f6d58c54SBarry Smith if (!coloring->w3) { 2540*f6d58c54SBarry Smith ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr); 2541*f6d58c54SBarry Smith ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr); 2542*f6d58c54SBarry Smith } 2543*f6d58c54SBarry Smith w3 = coloring->w3; 2544*f6d58c54SBarry Smith 2545*f6d58c54SBarry Smith CHKMEMQ; 2546*f6d58c54SBarry Smith /* Compute all the local scale factors, including ghost points */ 2547*f6d58c54SBarry Smith ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr); 2548*f6d58c54SBarry Smith ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr); 2549*f6d58c54SBarry Smith ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2550*f6d58c54SBarry Smith if (ctype == IS_COLORING_GHOSTED){ 2551*f6d58c54SBarry Smith col_start = 0; col_end = N; 2552*f6d58c54SBarry Smith } else if (ctype == IS_COLORING_GLOBAL){ 2553*f6d58c54SBarry Smith xx = xx - start; 2554*f6d58c54SBarry Smith vscale_array = vscale_array - start; 2555*f6d58c54SBarry Smith col_start = start; col_end = N + start; 2556*f6d58c54SBarry Smith } CHKMEMQ; 2557*f6d58c54SBarry Smith for (col=col_start; col<col_end; col++){ 2558*f6d58c54SBarry Smith /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */ 2559*f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { 2560*f6d58c54SBarry Smith dx = 1.0 + unorm; 2561*f6d58c54SBarry Smith } else { 2562*f6d58c54SBarry Smith dx = xx[col]; 2563*f6d58c54SBarry Smith } 2564*f6d58c54SBarry Smith if (dx == 0.0) dx = 1.0; 2565*f6d58c54SBarry Smith #if !defined(PETSC_USE_COMPLEX) 2566*f6d58c54SBarry Smith if (dx < umin && dx >= 0.0) dx = umin; 2567*f6d58c54SBarry Smith else if (dx < 0.0 && dx > -umin) dx = -umin; 2568*f6d58c54SBarry Smith #else 2569*f6d58c54SBarry Smith if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2570*f6d58c54SBarry Smith else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2571*f6d58c54SBarry Smith #endif 2572*f6d58c54SBarry Smith dx *= epsilon; 2573*f6d58c54SBarry Smith vscale_array[col] = 1.0/dx; 2574*f6d58c54SBarry Smith } CHKMEMQ; 2575*f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) vscale_array = vscale_array + start; 2576*f6d58c54SBarry Smith ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2577*f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL){ 2578*f6d58c54SBarry Smith ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2579*f6d58c54SBarry Smith ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2580*f6d58c54SBarry Smith } 2581*f6d58c54SBarry Smith CHKMEMQ; 2582*f6d58c54SBarry Smith if (coloring->vscaleforrow) { 2583*f6d58c54SBarry Smith vscaleforrow = coloring->vscaleforrow; 2584*f6d58c54SBarry Smith } else { 2585*f6d58c54SBarry Smith SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow"); 2586*f6d58c54SBarry Smith } 2587*f6d58c54SBarry Smith 2588*f6d58c54SBarry Smith 2589*f6d58c54SBarry Smith ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr); 2590*f6d58c54SBarry Smith /* 2591*f6d58c54SBarry Smith Loop over each color 2592*f6d58c54SBarry Smith */ 2593*f6d58c54SBarry Smith ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2594*f6d58c54SBarry Smith for (k=0; k<coloring->ncolors; k++) { 2595*f6d58c54SBarry Smith coloring->currentcolor = k; 2596*f6d58c54SBarry Smith for (i=0; i<bs; i++) { 2597*f6d58c54SBarry Smith ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr); 2598*f6d58c54SBarry Smith ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr); 2599*f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start; 2600*f6d58c54SBarry Smith /* 2601*f6d58c54SBarry Smith Loop over each column associated with color 2602*f6d58c54SBarry Smith adding the perturbation to the vector w3. 2603*f6d58c54SBarry Smith */ 2604*f6d58c54SBarry Smith for (l=0; l<coloring->ncolumns[k]; l++) { 2605*f6d58c54SBarry Smith col = i + bs*coloring->columns[k][l]; /* local column of the matrix we are probing for */ 2606*f6d58c54SBarry Smith if (coloring->htype[0] == 'w') { 2607*f6d58c54SBarry Smith dx = 1.0 + unorm; 2608*f6d58c54SBarry Smith } else { 2609*f6d58c54SBarry Smith dx = xx[col]; 2610*f6d58c54SBarry Smith } 2611*f6d58c54SBarry Smith if (dx == 0.0) dx = 1.0; 2612*f6d58c54SBarry Smith #if !defined(PETSC_USE_COMPLEX) 2613*f6d58c54SBarry Smith if (dx < umin && dx >= 0.0) dx = umin; 2614*f6d58c54SBarry Smith else if (dx < 0.0 && dx > -umin) dx = -umin; 2615*f6d58c54SBarry Smith #else 2616*f6d58c54SBarry Smith if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0) dx = umin; 2617*f6d58c54SBarry Smith else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin; 2618*f6d58c54SBarry Smith #endif 2619*f6d58c54SBarry Smith dx *= epsilon; 2620*f6d58c54SBarry Smith if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter"); 2621*f6d58c54SBarry Smith w3_array[col] += dx; 2622*f6d58c54SBarry Smith } 2623*f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start; 2624*f6d58c54SBarry Smith ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr); 2625*f6d58c54SBarry Smith 2626*f6d58c54SBarry Smith /* 2627*f6d58c54SBarry Smith Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 2628*f6d58c54SBarry Smith w2 = F(x1 + dx) - F(x1) 2629*f6d58c54SBarry Smith */ 2630*f6d58c54SBarry Smith ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2631*f6d58c54SBarry Smith ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr); 2632*f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr); 2633*f6d58c54SBarry Smith ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr); 2634*f6d58c54SBarry Smith 2635*f6d58c54SBarry Smith /* 2636*f6d58c54SBarry Smith Loop over rows of vector, putting results into Jacobian matrix 2637*f6d58c54SBarry Smith */ 2638*f6d58c54SBarry Smith ierr = VecGetArray(w2,&y);CHKERRQ(ierr); 2639*f6d58c54SBarry Smith for (l=0; l<coloring->nrows[k]; l++) { 2640*f6d58c54SBarry Smith row = bs*coloring->rows[k][l]; /* local row index */ 2641*f6d58c54SBarry Smith col = i + bs*coloring->columnsforrow[k][l]; /* global column index */ 2642*f6d58c54SBarry Smith for (j=0; j<bs; j++) { 2643*f6d58c54SBarry Smith y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]]; 2644*f6d58c54SBarry Smith srows[j] = row + start + j; 2645*f6d58c54SBarry Smith } 2646*f6d58c54SBarry Smith ierr = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr); 2647*f6d58c54SBarry Smith } 2648*f6d58c54SBarry Smith ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr); 2649*f6d58c54SBarry Smith } 2650*f6d58c54SBarry Smith } /* endof for each color */ 2651*f6d58c54SBarry Smith if (ctype == IS_COLORING_GLOBAL) xx = xx + start; 2652*f6d58c54SBarry Smith ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr); 2653*f6d58c54SBarry Smith ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr); 2654*f6d58c54SBarry Smith ierr = PetscFree(srows);CHKERRQ(ierr); 2655*f6d58c54SBarry Smith 2656*f6d58c54SBarry Smith coloring->currentcolor = -1; 2657*f6d58c54SBarry Smith ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658*f6d58c54SBarry Smith ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659*f6d58c54SBarry Smith ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr); 2660*f6d58c54SBarry Smith PetscFunctionReturn(0); 2661*f6d58c54SBarry Smith } 266299cafbc1SBarry Smith 26632593348eSBarry Smith /* -------------------------------------------------------------------*/ 2664cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2665cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 2666cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 2667cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 266897304618SKris Buschelman /* 4*/ MatMultAdd_SeqBAIJ_N, 26697c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 26707c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 2671db4efbfdSBarry Smith 0, 2672cc2dc46cSBarry Smith 0, 2673cc2dc46cSBarry Smith 0, 267497304618SKris Buschelman /*10*/ 0, 2675cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 2676cc2dc46cSBarry Smith 0, 2677b6490206SBarry Smith 0, 2678f2501298SSatish Balay MatTranspose_SeqBAIJ, 267997304618SKris Buschelman /*15*/ MatGetInfo_SeqBAIJ, 2680cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 2681cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 2682cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 2683cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 268497304618SKris Buschelman /*20*/ 0, 2685cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 2686cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 2687cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 2688d519adbfSMatthew Knepley /*24*/ MatZeroRows_SeqBAIJ, 2689db4efbfdSBarry Smith 0, 2690db4efbfdSBarry Smith 0, 2691db4efbfdSBarry Smith 0, 2692db4efbfdSBarry Smith 0, 2693d519adbfSMatthew Knepley /*29*/ MatSetUpPreallocation_SeqBAIJ, 2694db4efbfdSBarry Smith 0, 2695db4efbfdSBarry Smith 0, 2696f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 2697f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 2698d519adbfSMatthew Knepley /*34*/ MatDuplicate_SeqBAIJ, 2699cc2dc46cSBarry Smith 0, 2700cc2dc46cSBarry Smith 0, 2701cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 2702cc2dc46cSBarry Smith 0, 2703d519adbfSMatthew Knepley /*39*/ MatAXPY_SeqBAIJ, 2704cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 2705cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 2706cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 27073c896bc6SHong Zhang MatCopy_SeqBAIJ, 2708d519adbfSMatthew Knepley /*44*/ 0, 2709cc2dc46cSBarry Smith MatScale_SeqBAIJ, 2710cc2dc46cSBarry Smith 0, 2711cc2dc46cSBarry Smith 0, 2712c8342467SHong Zhang MatILUDTFactor_SeqBAIJ, 2713d519adbfSMatthew Knepley /*49*/ 0, 27143b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 271592c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 27163acb8795SBarry Smith MatGetColumnIJ_SeqBAIJ, 27173acb8795SBarry Smith MatRestoreColumnIJ_SeqBAIJ, 27183acb8795SBarry Smith /*54*/ MatFDColoringCreate_SeqAIJ, 2719cc2dc46cSBarry Smith 0, 2720cc2dc46cSBarry Smith 0, 2721cc2dc46cSBarry Smith 0, 2722d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 2723d519adbfSMatthew Knepley /*59*/ MatGetSubMatrix_SeqBAIJ, 2724b9b97703SBarry Smith MatDestroy_SeqBAIJ, 2725b9b97703SBarry Smith MatView_SeqBAIJ, 2726357abbc8SBarry Smith 0, 2727273d9f13SBarry Smith 0, 2728d519adbfSMatthew Knepley /*64*/ 0, 2729273d9f13SBarry Smith 0, 2730273d9f13SBarry Smith 0, 2731273d9f13SBarry Smith 0, 2732273d9f13SBarry Smith 0, 2733d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_SeqBAIJ, 2734273d9f13SBarry Smith 0, 2735c87e5d42SMatthew Knepley MatConvert_Basic, 273697304618SKris Buschelman 0, 273797304618SKris Buschelman 0, 2738d519adbfSMatthew Knepley /*74*/ 0, 2739*f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 274097304618SKris Buschelman 0, 274197304618SKris Buschelman 0, 274297304618SKris Buschelman 0, 2743d519adbfSMatthew Knepley /*79*/ 0, 274497304618SKris Buschelman 0, 274597304618SKris Buschelman 0, 274697304618SKris Buschelman 0, 2747865e5f61SKris Buschelman MatLoad_SeqBAIJ, 2748d519adbfSMatthew Knepley /*84*/ 0, 2749b01c7715SBarry Smith 0, 2750b01c7715SBarry Smith 0, 2751b01c7715SBarry Smith 0, 2752865e5f61SKris Buschelman 0, 2753d519adbfSMatthew Knepley /*89*/ 0, 2754865e5f61SKris Buschelman 0, 2755865e5f61SKris Buschelman 0, 2756865e5f61SKris Buschelman 0, 2757865e5f61SKris Buschelman 0, 2758d519adbfSMatthew Knepley /*94*/ 0, 2759865e5f61SKris Buschelman 0, 2760865e5f61SKris Buschelman 0, 276199cafbc1SBarry Smith 0, 276299cafbc1SBarry Smith 0, 2763d519adbfSMatthew Knepley /*99*/0, 276499cafbc1SBarry Smith 0, 276599cafbc1SBarry Smith 0, 276699cafbc1SBarry Smith 0, 276799cafbc1SBarry Smith 0, 2768d519adbfSMatthew Knepley /*104*/0, 276999cafbc1SBarry Smith MatRealPart_SeqBAIJ, 27702af78befSBarry Smith MatImaginaryPart_SeqBAIJ, 27712af78befSBarry Smith 0, 27722af78befSBarry Smith 0, 2773d519adbfSMatthew Knepley /*109*/0, 27742af78befSBarry Smith 0, 27752af78befSBarry Smith 0, 27762af78befSBarry Smith 0, 27772af78befSBarry Smith MatMissingDiagonal_SeqBAIJ 2778d519adbfSMatthew Knepley /*114*/ 277999cafbc1SBarry Smith }; 27802593348eSBarry Smith 27813e90b805SBarry Smith EXTERN_C_BEGIN 27824a2ae208SSatish Balay #undef __FUNCT__ 27834a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2784be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat) 27853e90b805SBarry Smith { 27863e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 2787d0f46423SBarry Smith PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 2788dfbe8321SBarry Smith PetscErrorCode ierr; 27893e90b805SBarry Smith 27903e90b805SBarry Smith PetscFunctionBegin; 27913e90b805SBarry Smith if (aij->nonew != 1) { 2792512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 27933e90b805SBarry Smith } 27943e90b805SBarry Smith 27953e90b805SBarry Smith /* allocate space for values if not already there */ 27963e90b805SBarry Smith if (!aij->saved_values) { 279787828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 27983e90b805SBarry Smith } 27993e90b805SBarry Smith 28003e90b805SBarry Smith /* copy values over */ 280187828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 28023e90b805SBarry Smith PetscFunctionReturn(0); 28033e90b805SBarry Smith } 28043e90b805SBarry Smith EXTERN_C_END 28053e90b805SBarry Smith 28063e90b805SBarry Smith EXTERN_C_BEGIN 28074a2ae208SSatish Balay #undef __FUNCT__ 28084a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2809be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat) 28103e90b805SBarry Smith { 28113e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 28126849ba73SBarry Smith PetscErrorCode ierr; 2813d0f46423SBarry Smith PetscInt nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2; 28143e90b805SBarry Smith 28153e90b805SBarry Smith PetscFunctionBegin; 28163e90b805SBarry Smith if (aij->nonew != 1) { 2817512a5fc5SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 28183e90b805SBarry Smith } 28193e90b805SBarry Smith if (!aij->saved_values) { 2820634064b4SBarry Smith SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 28213e90b805SBarry Smith } 28223e90b805SBarry Smith 28233e90b805SBarry Smith /* copy values over */ 282487828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 28253e90b805SBarry Smith PetscFunctionReturn(0); 28263e90b805SBarry Smith } 28273e90b805SBarry Smith EXTERN_C_END 28283e90b805SBarry Smith 2829273d9f13SBarry Smith EXTERN_C_BEGIN 2830f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2831f69a0ea3SMatthew Knepley extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2832273d9f13SBarry Smith EXTERN_C_END 2833273d9f13SBarry Smith 2834273d9f13SBarry Smith EXTERN_C_BEGIN 28354a2ae208SSatish Balay #undef __FUNCT__ 2836a23d5eceSKris Buschelman #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2837be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2838a23d5eceSKris Buschelman { 2839a23d5eceSKris Buschelman Mat_SeqBAIJ *b; 28406849ba73SBarry Smith PetscErrorCode ierr; 2841db4efbfdSBarry Smith PetscInt i,mbs,nbs,bs2,newbs = PetscAbs(bs); 2842ab93d7beSBarry Smith PetscTruth flg,skipallocation = PETSC_FALSE; 2843a23d5eceSKris Buschelman 2844a23d5eceSKris Buschelman PetscFunctionBegin; 2845a23d5eceSKris Buschelman 2846ab93d7beSBarry Smith if (nz == MAT_SKIP_ALLOCATION) { 2847ab93d7beSBarry Smith skipallocation = PETSC_TRUE; 2848ab93d7beSBarry Smith nz = 0; 2849ab93d7beSBarry Smith } 28508c07d4e3SBarry Smith 2851db4efbfdSBarry Smith if (bs < 0) { 28527adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr); 2853db4efbfdSBarry Smith ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr); 28548c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2855db4efbfdSBarry Smith bs = PetscAbs(bs); 2856db4efbfdSBarry Smith } 2857a23d5eceSKris Buschelman if (nnz && newbs != bs) { 2858634064b4SBarry Smith SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz"); 2859a23d5eceSKris Buschelman } 2860a23d5eceSKris Buschelman bs = newbs; 2861a23d5eceSKris Buschelman 28627408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 28637408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2864d0f46423SBarry Smith ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2865d0f46423SBarry Smith ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2866899cda47SBarry Smith 2867899cda47SBarry Smith B->preallocated = PETSC_TRUE; 2868899cda47SBarry Smith 2869d0f46423SBarry Smith mbs = B->rmap->n/bs; 2870d0f46423SBarry Smith nbs = B->cmap->n/bs; 2871a23d5eceSKris Buschelman bs2 = bs*bs; 2872a23d5eceSKris Buschelman 2873d0f46423SBarry Smith if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) { 2874d0f46423SBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2875a23d5eceSKris Buschelman } 2876a23d5eceSKris Buschelman 2877a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 287877431f27SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2879a23d5eceSKris Buschelman if (nnz) { 2880a23d5eceSKris Buschelman for (i=0; i<mbs; i++) { 288177431f27SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 288277431f27SBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2883a23d5eceSKris Buschelman } 2884a23d5eceSKris Buschelman } 2885a23d5eceSKris Buschelman 2886a23d5eceSKris Buschelman b = (Mat_SeqBAIJ*)B->data; 28877adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 28888c07d4e3SBarry Smith ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr); 28898c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 28908c07d4e3SBarry Smith 2891a23d5eceSKris Buschelman if (!flg) { 2892a23d5eceSKris Buschelman switch (bs) { 2893a23d5eceSKris Buschelman case 1: 2894a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_1; 2895a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_1; 28966d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_1; 2897a23d5eceSKris Buschelman break; 2898a23d5eceSKris Buschelman case 2: 2899a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_2; 2900a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2901b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_2; 2902a23d5eceSKris Buschelman break; 2903a23d5eceSKris Buschelman case 3: 2904a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_3; 2905a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2906b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_3; 2907a23d5eceSKris Buschelman break; 2908a23d5eceSKris Buschelman case 4: 2909a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_4; 2910a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2911b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_4; 2912a23d5eceSKris Buschelman break; 2913a23d5eceSKris Buschelman case 5: 2914a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_5; 2915a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2916b01c7715SBarry Smith B->ops->pbrelax = MatPBRelax_SeqBAIJ_5; 2917a23d5eceSKris Buschelman break; 2918a23d5eceSKris Buschelman case 6: 2919a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_6; 2920a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_6; 29216d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_6; 2922a23d5eceSKris Buschelman break; 2923a23d5eceSKris Buschelman case 7: 2924a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_7; 2925a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_7; 29266d3beeddSMatthew Knepley B->ops->pbrelax = MatPBRelax_SeqBAIJ_7; 2927a23d5eceSKris Buschelman break; 2928a23d5eceSKris Buschelman default: 2929a23d5eceSKris Buschelman B->ops->mult = MatMult_SeqBAIJ_N; 2930a23d5eceSKris Buschelman B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2931a23d5eceSKris Buschelman break; 2932a23d5eceSKris Buschelman } 2933a23d5eceSKris Buschelman } 2934d0f46423SBarry Smith B->rmap->bs = bs; 2935a23d5eceSKris Buschelman b->mbs = mbs; 2936a23d5eceSKris Buschelman b->nbs = nbs; 2937ab93d7beSBarry Smith if (!skipallocation) { 29382ee49352SLisandro Dalcin if (!b->imax) { 2939ab93d7beSBarry Smith ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 29402ee49352SLisandro Dalcin } 2941ab93d7beSBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 2942ab93d7beSBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2943a23d5eceSKris Buschelman if (!nnz) { 2944a23d5eceSKris Buschelman if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2945a23d5eceSKris Buschelman else if (nz <= 0) nz = 1; 2946a23d5eceSKris Buschelman for (i=0; i<mbs; i++) b->imax[i] = nz; 2947a23d5eceSKris Buschelman nz = nz*mbs; 2948a23d5eceSKris Buschelman } else { 2949a23d5eceSKris Buschelman nz = 0; 2950a23d5eceSKris Buschelman for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2951a23d5eceSKris Buschelman } 2952a23d5eceSKris Buschelman 2953a23d5eceSKris Buschelman /* allocate the matrix space */ 29542ee49352SLisandro Dalcin ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2955d0f46423SBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr); 2956d0f46423SBarry Smith ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2957a23d5eceSKris Buschelman ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2958c1ac3661SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2959a23d5eceSKris Buschelman b->singlemalloc = PETSC_TRUE; 2960a23d5eceSKris Buschelman b->i[0] = 0; 2961a23d5eceSKris Buschelman for (i=1; i<mbs+1; i++) { 2962a23d5eceSKris Buschelman b->i[i] = b->i[i-1] + b->imax[i-1]; 2963a23d5eceSKris Buschelman } 2964e6b907acSBarry Smith b->free_a = PETSC_TRUE; 2965e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 2966e811da20SHong Zhang } else { 2967e6b907acSBarry Smith b->free_a = PETSC_FALSE; 2968e6b907acSBarry Smith b->free_ij = PETSC_FALSE; 2969ab93d7beSBarry Smith } 2970a23d5eceSKris Buschelman 2971d0f46423SBarry Smith B->rmap->bs = bs; 2972a23d5eceSKris Buschelman b->bs2 = bs2; 2973a23d5eceSKris Buschelman b->mbs = mbs; 2974a23d5eceSKris Buschelman b->nz = 0; 2975a23d5eceSKris Buschelman b->maxnz = nz*bs2; 2976a23d5eceSKris Buschelman B->info.nz_unneeded = (PetscReal)b->maxnz; 2977a23d5eceSKris Buschelman PetscFunctionReturn(0); 2978a23d5eceSKris Buschelman } 2979a23d5eceSKris Buschelman EXTERN_C_END 2980a23d5eceSKris Buschelman 2981b24902e0SBarry Smith EXTERN_C_BEGIN 2982725b52f3SLisandro Dalcin #undef __FUNCT__ 2983725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2984cf12db73SBarry Smith PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2985725b52f3SLisandro Dalcin { 2986725b52f3SLisandro Dalcin PetscInt i,m,nz,nz_max=0,*nnz; 2987725b52f3SLisandro Dalcin PetscScalar *values=0; 2988725b52f3SLisandro Dalcin PetscErrorCode ierr; 2989725b52f3SLisandro Dalcin 2990725b52f3SLisandro Dalcin PetscFunctionBegin; 2991725b52f3SLisandro Dalcin 2992725b52f3SLisandro Dalcin if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 29937408324eSLisandro Dalcin 29947408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 29957408324eSLisandro Dalcin ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2996d0f46423SBarry Smith ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr); 2997d0f46423SBarry Smith ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr); 2998d0f46423SBarry Smith m = B->rmap->n/bs; 2999725b52f3SLisandro Dalcin 3000cf12db73SBarry Smith if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); } 3001725b52f3SLisandro Dalcin ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr); 3002725b52f3SLisandro Dalcin for(i=0; i<m; i++) { 3003cf12db73SBarry Smith nz = ii[i+1]- ii[i]; 3004725b52f3SLisandro Dalcin if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); } 3005725b52f3SLisandro Dalcin nz_max = PetscMax(nz_max, nz); 3006725b52f3SLisandro Dalcin nnz[i] = nz; 3007725b52f3SLisandro Dalcin } 3008725b52f3SLisandro Dalcin ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3009725b52f3SLisandro Dalcin ierr = PetscFree(nnz);CHKERRQ(ierr); 3010725b52f3SLisandro Dalcin 3011725b52f3SLisandro Dalcin values = (PetscScalar*)V; 3012725b52f3SLisandro Dalcin if (!values) { 3013725b52f3SLisandro Dalcin ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3014725b52f3SLisandro Dalcin ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3015725b52f3SLisandro Dalcin } 3016725b52f3SLisandro Dalcin for (i=0; i<m; i++) { 3017cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 3018cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 3019cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3020725b52f3SLisandro Dalcin ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3021725b52f3SLisandro Dalcin } 3022725b52f3SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3023725b52f3SLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024725b52f3SLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025725b52f3SLisandro Dalcin 3026725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3027725b52f3SLisandro Dalcin } 3028725b52f3SLisandro Dalcin EXTERN_C_END 3029725b52f3SLisandro Dalcin 3030725b52f3SLisandro Dalcin 3031207126cbSBarry Smith EXTERN_C_BEGIN 3032b24902e0SBarry Smith extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*); 3033db4efbfdSBarry Smith extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*); 3034b24902e0SBarry Smith EXTERN_C_END 3035b24902e0SBarry Smith 30360bad9183SKris Buschelman /*MC 3037fafad747SKris Buschelman MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 30380bad9183SKris Buschelman block sparse compressed row format. 30390bad9183SKris Buschelman 30400bad9183SKris Buschelman Options Database Keys: 30410bad9183SKris Buschelman . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 30420bad9183SKris Buschelman 30430bad9183SKris Buschelman Level: beginner 30440bad9183SKris Buschelman 3045f0c06035SSatish Balay .seealso: MatCreateSeqBAIJ() 30460bad9183SKris Buschelman M*/ 30470bad9183SKris Buschelman 3048b24902e0SBarry Smith 3049a23d5eceSKris Buschelman EXTERN_C_BEGIN 3050a23d5eceSKris Buschelman #undef __FUNCT__ 30514a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 3052be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B) 30532593348eSBarry Smith { 3054dfbe8321SBarry Smith PetscErrorCode ierr; 3055c1ac3661SBarry Smith PetscMPIInt size; 3056b6490206SBarry Smith Mat_SeqBAIJ *b; 30573b2fbd54SBarry Smith 30583a40ed3dSBarry Smith PetscFunctionBegin; 30597adad957SLisandro Dalcin ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 306029bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3061b6490206SBarry Smith 306238f2d2fdSLisandro Dalcin ierr = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr); 3063b0a32e0cSBarry Smith B->data = (void*)b; 3064549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 306590f02eecSBarry Smith B->mapping = 0; 30662593348eSBarry Smith b->row = 0; 30672593348eSBarry Smith b->col = 0; 3068e51c0b9cSSatish Balay b->icol = 0; 30692593348eSBarry Smith b->reallocs = 0; 30703e90b805SBarry Smith b->saved_values = 0; 30712593348eSBarry Smith 3072c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 30732593348eSBarry Smith b->nonew = 0; 30742593348eSBarry Smith b->diag = 0; 30752593348eSBarry Smith b->solve_work = 0; 3076de6a44a3SBarry Smith b->mult_work = 0; 30772a1b7f2aSHong Zhang B->spptr = 0; 30780e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 3079a9817697SBarry Smith b->keepnonzeropattern = PETSC_FALSE; 3080c4319e64SHong Zhang b->xtoy = 0; 3081c4319e64SHong Zhang b->XtoY = 0; 308273e7a558SHong Zhang b->compressedrow.use = PETSC_FALSE; 308326e093fcSHong Zhang b->compressedrow.nrows = 0; 308473e7a558SHong Zhang b->compressedrow.i = PETSC_NULL; 308573e7a558SHong Zhang b->compressedrow.rindex = PETSC_NULL; 308673e7a558SHong Zhang b->compressedrow.checked = PETSC_FALSE; 308788e51ccdSHong Zhang B->same_nonzero = PETSC_FALSE; 30884e220ebcSLois Curfman McInnes 3089db4efbfdSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C", 3090db4efbfdSBarry Smith "MatGetFactorAvailable_seqbaij_petsc", 3091db4efbfdSBarry Smith MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr); 3092b24902e0SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C", 3093b24902e0SBarry Smith "MatGetFactor_seqbaij_petsc", 3094b24902e0SBarry Smith MatGetFactor_seqbaij_petsc);CHKERRQ(ierr); 309543516a2dSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C", 309643516a2dSKris Buschelman "MatInvertBlockDiagonal_SeqBAIJ", 309743516a2dSKris Buschelman MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3098f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 30993e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 3100bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3101f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 31023e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 3103bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3104f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 310527a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 3106bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3107a6175056SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 3108273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 3109273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3110a0e1a404SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C", 3111a0e1a404SHong Zhang "MatConvert_SeqBAIJ_SeqSBAIJ", 3112a0e1a404SHong Zhang MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3113a23d5eceSKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 3114a23d5eceSKris Buschelman "MatSeqBAIJSetPreallocation_SeqBAIJ", 3115a23d5eceSKris Buschelman MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3116725b52f3SLisandro Dalcin ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C", 3117725b52f3SLisandro Dalcin "MatSeqBAIJSetPreallocationCSR_SeqBAIJ", 3118725b52f3SLisandro Dalcin MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 311917667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 31203a40ed3dSBarry Smith PetscFunctionReturn(0); 31212593348eSBarry Smith } 3122273d9f13SBarry Smith EXTERN_C_END 31232593348eSBarry Smith 31244a2ae208SSatish Balay #undef __FUNCT__ 3125b24902e0SBarry Smith #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3126719d5645SBarry Smith PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues) 31272593348eSBarry Smith { 3128b24902e0SBarry Smith Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 31296849ba73SBarry Smith PetscErrorCode ierr; 3130a96a251dSBarry Smith PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3131de6a44a3SBarry Smith 31323a40ed3dSBarry Smith PetscFunctionBegin; 313329bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 31342593348eSBarry Smith 313533b91e9fSSatish Balay ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr); 3136b6490206SBarry Smith for (i=0; i<mbs; i++) { 31372593348eSBarry Smith c->imax[i] = a->imax[i]; 31382593348eSBarry Smith c->ilen[i] = a->ilen[i]; 31392593348eSBarry Smith } 31402593348eSBarry Smith 31412593348eSBarry Smith /* allocate the matrix space */ 3142a96a251dSBarry Smith ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 3143c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 3144c1ac3661SBarry Smith ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3145b6490206SBarry Smith if (mbs > 0) { 3146c1ac3661SBarry Smith ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 31472e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 3148549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 31492e8a6d31SBarry Smith } else { 3150549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 31512593348eSBarry Smith } 31522593348eSBarry Smith } 31532593348eSBarry Smith c->roworiented = a->roworiented; 31542593348eSBarry Smith c->nonew = a->nonew; 3155d0f46423SBarry Smith ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr); 3156d0f46423SBarry Smith ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr); 31575c9eb25fSBarry Smith c->bs2 = a->bs2; 31585c9eb25fSBarry Smith c->mbs = a->mbs; 31595c9eb25fSBarry Smith c->nbs = a->nbs; 31602593348eSBarry Smith 31612593348eSBarry Smith if (a->diag) { 3162c1ac3661SBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 316352e6d16bSBarry Smith ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3164b6490206SBarry Smith for (i=0; i<mbs; i++) { 31652593348eSBarry Smith c->diag[i] = a->diag[i]; 31662593348eSBarry Smith } 316798305bb5SBarry Smith } else c->diag = 0; 31682593348eSBarry Smith c->nz = a->nz; 31692593348eSBarry Smith c->maxnz = a->maxnz; 31702593348eSBarry Smith c->solve_work = 0; 31717fc0212eSBarry Smith c->mult_work = 0; 3172e6b907acSBarry Smith c->free_a = PETSC_TRUE; 3173e6b907acSBarry Smith c->free_ij = PETSC_TRUE; 3174273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 3175273d9f13SBarry Smith C->assembled = PETSC_TRUE; 317688e51ccdSHong Zhang 317788e51ccdSHong Zhang c->compressedrow.use = a->compressedrow.use; 317888e51ccdSHong Zhang c->compressedrow.nrows = a->compressedrow.nrows; 317988e51ccdSHong Zhang c->compressedrow.checked = a->compressedrow.checked; 318088e51ccdSHong Zhang if ( a->compressedrow.checked && a->compressedrow.use){ 318188e51ccdSHong Zhang i = a->compressedrow.nrows; 318288e51ccdSHong Zhang ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr); 318388e51ccdSHong Zhang c->compressedrow.rindex = c->compressedrow.i + i + 1; 318488e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 318588e51ccdSHong Zhang ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 318688e51ccdSHong Zhang } else { 318788e51ccdSHong Zhang c->compressedrow.use = PETSC_FALSE; 318888e51ccdSHong Zhang c->compressedrow.i = PETSC_NULL; 318988e51ccdSHong Zhang c->compressedrow.rindex = PETSC_NULL; 319088e51ccdSHong Zhang } 319188e51ccdSHong Zhang C->same_nonzero = A->same_nonzero; 31927adad957SLisandro Dalcin ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 31933a40ed3dSBarry Smith PetscFunctionReturn(0); 31942593348eSBarry Smith } 31952593348eSBarry Smith 31964a2ae208SSatish Balay #undef __FUNCT__ 3197b24902e0SBarry Smith #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3198b24902e0SBarry Smith PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3199b24902e0SBarry Smith { 3200b24902e0SBarry Smith PetscErrorCode ierr; 3201b24902e0SBarry Smith 3202b24902e0SBarry Smith PetscFunctionBegin; 32035c9eb25fSBarry Smith ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 3204d0f46423SBarry Smith ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 32055c9eb25fSBarry Smith ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3206719d5645SBarry Smith ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues); 3207b24902e0SBarry Smith PetscFunctionReturn(0); 3208b24902e0SBarry Smith } 3209b24902e0SBarry Smith 3210b24902e0SBarry Smith #undef __FUNCT__ 32114a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 3212a313700dSBarry Smith PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A) 32132593348eSBarry Smith { 3214b6490206SBarry Smith Mat_SeqBAIJ *a; 32152593348eSBarry Smith Mat B; 32166849ba73SBarry Smith PetscErrorCode ierr; 3217b24ad042SBarry Smith PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 3218c1ac3661SBarry Smith PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3219c1ac3661SBarry Smith PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 3220c1ac3661SBarry Smith PetscInt *masked,nmask,tmp,bs2,ishift; 3221b24ad042SBarry Smith PetscMPIInt size; 3222b24ad042SBarry Smith int fd; 322387828ca2SBarry Smith PetscScalar *aa; 322419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 32252593348eSBarry Smith 32263a40ed3dSBarry Smith PetscFunctionBegin; 32278c07d4e3SBarry Smith ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 32288c07d4e3SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 32298c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 3230de6a44a3SBarry Smith bs2 = bs*bs; 3231b6490206SBarry Smith 3232d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 323329bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 3234b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 32350752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3236552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 32372593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 32382593348eSBarry Smith 3239d64ed03dSBarry Smith if (header[3] < 0) { 324029bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3241d64ed03dSBarry Smith } 3242d64ed03dSBarry Smith 324329bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 324435aab85fSBarry Smith 324535aab85fSBarry Smith /* 324635aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 324735aab85fSBarry Smith divisible by the blocksize 324835aab85fSBarry Smith */ 3249b6490206SBarry Smith mbs = M/bs; 325035aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 325135aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 325235aab85fSBarry Smith else mbs++; 325335aab85fSBarry Smith if (extra_rows) { 32541e2582c4SBarry Smith ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 325535aab85fSBarry Smith } 3256b6490206SBarry Smith 32572593348eSBarry Smith /* read in row lengths */ 3258c1ac3661SBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 32590752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 326035aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 32612593348eSBarry Smith 3262b6490206SBarry Smith /* read in column indices */ 3263c1ac3661SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 32640752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 326535aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3266b6490206SBarry Smith 3267b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 3268c1ac3661SBarry Smith ierr = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr); 3269c1ac3661SBarry Smith ierr = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3270c1ac3661SBarry Smith ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 3271c1ac3661SBarry Smith ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 327235aab85fSBarry Smith masked = mask + mbs; 3273b6490206SBarry Smith rowcount = 0; nzcount = 0; 3274b6490206SBarry Smith for (i=0; i<mbs; i++) { 327535aab85fSBarry Smith nmask = 0; 3276b6490206SBarry Smith for (j=0; j<bs; j++) { 3277b6490206SBarry Smith kmax = rowlengths[rowcount]; 3278b6490206SBarry Smith for (k=0; k<kmax; k++) { 327935aab85fSBarry Smith tmp = jj[nzcount++]/bs; 328035aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3281b6490206SBarry Smith } 3282b6490206SBarry Smith rowcount++; 3283b6490206SBarry Smith } 328435aab85fSBarry Smith browlengths[i] += nmask; 328535aab85fSBarry Smith /* zero out the mask elements we set */ 328635aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3287b6490206SBarry Smith } 3288b6490206SBarry Smith 32892593348eSBarry Smith /* create our matrix */ 3290f69a0ea3SMatthew Knepley ierr = MatCreate(comm,&B); 3291f69a0ea3SMatthew Knepley ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows); 329278ae41b4SKris Buschelman ierr = MatSetType(B,type);CHKERRQ(ierr); 3293ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr); 3294b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 32952593348eSBarry Smith 32962593348eSBarry Smith /* set matrix "i" values */ 3297de6a44a3SBarry Smith a->i[0] = 0; 3298b6490206SBarry Smith for (i=1; i<= mbs; i++) { 3299b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 3300b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 33012593348eSBarry Smith } 3302b6490206SBarry Smith a->nz = 0; 3303b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 33042593348eSBarry Smith 3305b6490206SBarry Smith /* read in nonzero values */ 330687828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 33070752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 330835aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3309b6490206SBarry Smith 3310b6490206SBarry Smith /* set "a" and "j" values into matrix */ 3311b6490206SBarry Smith nzcount = 0; jcount = 0; 3312b6490206SBarry Smith for (i=0; i<mbs; i++) { 3313b6490206SBarry Smith nzcountb = nzcount; 331435aab85fSBarry Smith nmask = 0; 3315b6490206SBarry Smith for (j=0; j<bs; j++) { 3316b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 3317b6490206SBarry Smith for (k=0; k<kmax; k++) { 331835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 331935aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3320b6490206SBarry Smith } 3321b6490206SBarry Smith } 3322de6a44a3SBarry Smith /* sort the masked values */ 3323433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3324de6a44a3SBarry Smith 3325b6490206SBarry Smith /* set "j" values into matrix */ 3326b6490206SBarry Smith maskcount = 1; 332735aab85fSBarry Smith for (j=0; j<nmask; j++) { 332835aab85fSBarry Smith a->j[jcount++] = masked[j]; 3329de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 3330b6490206SBarry Smith } 3331b6490206SBarry Smith /* set "a" values into matrix */ 3332de6a44a3SBarry Smith ishift = bs2*a->i[i]; 3333b6490206SBarry Smith for (j=0; j<bs; j++) { 3334b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 3335b6490206SBarry Smith for (k=0; k<kmax; k++) { 3336de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 3337de6a44a3SBarry Smith block = mask[tmp] - 1; 3338de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 3339de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 3340375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 3341b6490206SBarry Smith } 3342b6490206SBarry Smith } 334335aab85fSBarry Smith /* zero out the mask elements we set */ 334435aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3345b6490206SBarry Smith } 334629bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3347b6490206SBarry Smith 3348606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3349606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 3350606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 3351606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 3352606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 3353b6490206SBarry Smith 335478ae41b4SKris Buschelman ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 335578ae41b4SKris Buschelman ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 33569c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 335778ae41b4SKris Buschelman 335878ae41b4SKris Buschelman *A = B; 33593a40ed3dSBarry Smith PetscFunctionReturn(0); 33602593348eSBarry Smith } 33612593348eSBarry Smith 33624a2ae208SSatish Balay #undef __FUNCT__ 33634a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 3364273d9f13SBarry Smith /*@C 3365273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3366273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 3367273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3368273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3369273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 33702593348eSBarry Smith 3371273d9f13SBarry Smith Collective on MPI_Comm 3372273d9f13SBarry Smith 3373273d9f13SBarry Smith Input Parameters: 3374273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 3375273d9f13SBarry Smith . bs - size of block 3376273d9f13SBarry Smith . m - number of rows 3377273d9f13SBarry Smith . n - number of columns 337835d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 337935d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 3380273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 3381273d9f13SBarry Smith 3382273d9f13SBarry Smith Output Parameter: 3383273d9f13SBarry Smith . A - the matrix 3384273d9f13SBarry Smith 3385175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3386ae1d86c5SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. 3387175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3388175b88e8SBarry Smith 3389273d9f13SBarry Smith Options Database Keys: 3390273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3391273d9f13SBarry Smith block calculations (much slower) 3392273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3393273d9f13SBarry Smith 3394273d9f13SBarry Smith Level: intermediate 3395273d9f13SBarry Smith 3396273d9f13SBarry Smith Notes: 3397d1be2dadSMatthew Knepley The number of rows and columns must be divisible by blocksize. 3398d1be2dadSMatthew Knepley 339949a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 340049a6f317SBarry Smith 340135d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 340235d8aa7fSBarry Smith 3403273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3404273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3405273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3406273d9f13SBarry Smith 3407273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 3408273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3409273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 3410273d9f13SBarry Smith matrices. 3411273d9f13SBarry Smith 3412273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 3413273d9f13SBarry Smith @*/ 3414be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3415273d9f13SBarry Smith { 3416dfbe8321SBarry Smith PetscErrorCode ierr; 3417273d9f13SBarry Smith 3418273d9f13SBarry Smith PetscFunctionBegin; 3419f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3420f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3421273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3422ab93d7beSBarry Smith ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3423273d9f13SBarry Smith PetscFunctionReturn(0); 3424273d9f13SBarry Smith } 3425273d9f13SBarry Smith 34264a2ae208SSatish Balay #undef __FUNCT__ 34274a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3428273d9f13SBarry Smith /*@C 3429273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3430273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 3431273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 3432273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 3433273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 3434273d9f13SBarry Smith 3435273d9f13SBarry Smith Collective on MPI_Comm 3436273d9f13SBarry Smith 3437273d9f13SBarry Smith Input Parameters: 3438273d9f13SBarry Smith + A - the matrix 3439273d9f13SBarry Smith . bs - size of block 3440273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 3441273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 3442273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 3443273d9f13SBarry Smith 3444273d9f13SBarry Smith Options Database Keys: 3445273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 3446273d9f13SBarry Smith block calculations (much slower) 3447273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 3448273d9f13SBarry Smith 3449273d9f13SBarry Smith Level: intermediate 3450273d9f13SBarry Smith 3451273d9f13SBarry Smith Notes: 345249a6f317SBarry Smith If the nnz parameter is given then the nz parameter is ignored 345349a6f317SBarry Smith 3454aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3455aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3456aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3457aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3458aa95bbe8SBarry Smith 3459273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 3460273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 3461273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 3462273d9f13SBarry Smith 3463273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 3464273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 3465273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 3466273d9f13SBarry Smith matrices. 3467273d9f13SBarry Smith 3468aa95bbe8SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo() 3469273d9f13SBarry Smith @*/ 3470be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3471273d9f13SBarry Smith { 3472c1ac3661SBarry Smith PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 3473273d9f13SBarry Smith 3474273d9f13SBarry Smith PetscFunctionBegin; 3475a23d5eceSKris Buschelman ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 3476a23d5eceSKris Buschelman if (f) { 3477a23d5eceSKris Buschelman ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 3478273d9f13SBarry Smith } 3479273d9f13SBarry Smith PetscFunctionReturn(0); 3480273d9f13SBarry Smith } 3481a1d92eedSBarry Smith 3482c75a6043SHong Zhang #undef __FUNCT__ 3483725b52f3SLisandro Dalcin #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3484725b52f3SLisandro Dalcin /*@C 3485725b52f3SLisandro Dalcin MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3486725b52f3SLisandro Dalcin (the default sequential PETSc format). 3487725b52f3SLisandro Dalcin 3488725b52f3SLisandro Dalcin Collective on MPI_Comm 3489725b52f3SLisandro Dalcin 3490725b52f3SLisandro Dalcin Input Parameters: 3491725b52f3SLisandro Dalcin + A - the matrix 3492725b52f3SLisandro Dalcin . i - the indices into j for the start of each local row (starts with zero) 3493725b52f3SLisandro Dalcin . j - the column indices for each local row (starts with zero) these must be sorted for each row 3494725b52f3SLisandro Dalcin - v - optional values in the matrix 3495725b52f3SLisandro Dalcin 3496725b52f3SLisandro Dalcin Level: developer 3497725b52f3SLisandro Dalcin 3498725b52f3SLisandro Dalcin .keywords: matrix, aij, compressed row, sparse 3499725b52f3SLisandro Dalcin 3500725b52f3SLisandro Dalcin .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3501725b52f3SLisandro Dalcin @*/ 3502725b52f3SLisandro Dalcin PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3503725b52f3SLisandro Dalcin { 3504725b52f3SLisandro Dalcin PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]); 3505725b52f3SLisandro Dalcin 3506725b52f3SLisandro Dalcin PetscFunctionBegin; 3507725b52f3SLisandro Dalcin ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr); 3508725b52f3SLisandro Dalcin if (f) { 3509725b52f3SLisandro Dalcin ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr); 3510725b52f3SLisandro Dalcin } 3511725b52f3SLisandro Dalcin PetscFunctionReturn(0); 3512725b52f3SLisandro Dalcin } 3513725b52f3SLisandro Dalcin 3514725b52f3SLisandro Dalcin 3515725b52f3SLisandro Dalcin #undef __FUNCT__ 3516c75a6043SHong Zhang #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3517c75a6043SHong Zhang /*@ 3518c75a6043SHong Zhang MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements 3519c75a6043SHong Zhang (upper triangular entries in CSR format) provided by the user. 3520c75a6043SHong Zhang 3521c75a6043SHong Zhang Collective on MPI_Comm 3522c75a6043SHong Zhang 3523c75a6043SHong Zhang Input Parameters: 3524c75a6043SHong Zhang + comm - must be an MPI communicator of size 1 3525c75a6043SHong Zhang . bs - size of block 3526c75a6043SHong Zhang . m - number of rows 3527c75a6043SHong Zhang . n - number of columns 3528c75a6043SHong Zhang . i - row indices 3529c75a6043SHong Zhang . j - column indices 3530c75a6043SHong Zhang - a - matrix values 3531c75a6043SHong Zhang 3532c75a6043SHong Zhang Output Parameter: 3533c75a6043SHong Zhang . mat - the matrix 3534c75a6043SHong Zhang 3535c75a6043SHong Zhang Level: intermediate 3536c75a6043SHong Zhang 3537c75a6043SHong Zhang Notes: 3538c75a6043SHong Zhang The i, j, and a arrays are not copied by this routine, the user must free these arrays 3539c75a6043SHong Zhang once the matrix is destroyed 3540c75a6043SHong Zhang 3541c75a6043SHong Zhang You cannot set new nonzero locations into this matrix, that will generate an error. 3542c75a6043SHong Zhang 3543c75a6043SHong Zhang The i and j indices are 0 based 3544c75a6043SHong Zhang 3545c75a6043SHong Zhang .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ() 3546c75a6043SHong Zhang 3547c75a6043SHong Zhang @*/ 3548c75a6043SHong Zhang PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 3549c75a6043SHong Zhang { 3550c75a6043SHong Zhang PetscErrorCode ierr; 3551c75a6043SHong Zhang PetscInt ii; 3552c75a6043SHong Zhang Mat_SeqBAIJ *baij; 3553c75a6043SHong Zhang 3554c75a6043SHong Zhang PetscFunctionBegin; 3555c75a6043SHong Zhang if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3556c75a6043SHong Zhang if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3557c75a6043SHong Zhang 3558c75a6043SHong Zhang ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559c75a6043SHong Zhang ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3560c75a6043SHong Zhang ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3561c75a6043SHong Zhang ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3562c75a6043SHong Zhang baij = (Mat_SeqBAIJ*)(*mat)->data; 3563c75a6043SHong Zhang ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr); 3564c75a6043SHong Zhang 3565c75a6043SHong Zhang baij->i = i; 3566c75a6043SHong Zhang baij->j = j; 3567c75a6043SHong Zhang baij->a = a; 3568c75a6043SHong Zhang baij->singlemalloc = PETSC_FALSE; 3569c75a6043SHong Zhang baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3570e6b907acSBarry Smith baij->free_a = PETSC_FALSE; 3571e6b907acSBarry Smith baij->free_ij = PETSC_FALSE; 3572c75a6043SHong Zhang 3573c75a6043SHong Zhang for (ii=0; ii<m; ii++) { 3574c75a6043SHong Zhang baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3575c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3576c75a6043SHong Zhang if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3577c75a6043SHong Zhang #endif 3578c75a6043SHong Zhang } 3579c75a6043SHong Zhang #if defined(PETSC_USE_DEBUG) 3580c75a6043SHong Zhang for (ii=0; ii<baij->i[m]; ii++) { 3581c75a6043SHong Zhang if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3582c75a6043SHong Zhang if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3583c75a6043SHong Zhang } 3584c75a6043SHong Zhang #endif 3585c75a6043SHong Zhang 3586c75a6043SHong Zhang ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3587c75a6043SHong Zhang ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3588c75a6043SHong Zhang PetscFunctionReturn(0); 3589c75a6043SHong Zhang } 3590