1*79bdfe76SSatish Balay #ifndef lint 2*79bdfe76SSatish Balay static char vcid[] = "$Id: mbaijfact.c,v 1.1 1996/06/03 19:55:56 balay Exp $"; 3*79bdfe76SSatish Balay #endif 4*79bdfe76SSatish Balay 5*79bdfe76SSatish Balay #include "mpibaij.h" 6*79bdfe76SSatish Balay 7*79bdfe76SSatish Balay 8*79bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj) 9*79bdfe76SSatish Balay { 10*79bdfe76SSatish Balay Mat mat = (Mat) obj; 11*79bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 12*79bdfe76SSatish Balay int ierr; 13*79bdfe76SSatish Balay 14*79bdfe76SSatish Balay #if defined(PETSC_LOG) 15*79bdfe76SSatish Balay PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 16*79bdfe76SSatish Balay #endif 17*79bdfe76SSatish Balay 18*79bdfe76SSatish Balay PetscFree(baij->rowners); 19*79bdfe76SSatish Balay ierr = MatDestroy(baij->A); CHKERRQ(ierr); 20*79bdfe76SSatish Balay ierr = MatDestroy(baij->B); CHKERRQ(ierr); 21*79bdfe76SSatish Balay if (baij->colmap) PetscFree(baij->colmap); 22*79bdfe76SSatish Balay if (baij->garray) PetscFree(baij->garray); 23*79bdfe76SSatish Balay if (baij->lvec) VecDestroy(baij->lvec); 24*79bdfe76SSatish Balay if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 25*79bdfe76SSatish Balay if (baij->rowvalues) PetscFree(baij->rowvalues); 26*79bdfe76SSatish Balay PetscFree(baij); 27*79bdfe76SSatish Balay PLogObjectDestroy(mat); 28*79bdfe76SSatish Balay PetscHeaderDestroy(mat); 29*79bdfe76SSatish Balay return 0; 30*79bdfe76SSatish Balay } 31*79bdfe76SSatish Balay 32*79bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 33*79bdfe76SSatish Balay static struct _MatOps MatOps = { 34*79bdfe76SSatish Balay 0,0,0,0, 35*79bdfe76SSatish Balay 0,0,0,0, 36*79bdfe76SSatish Balay 0,0,0,0, 37*79bdfe76SSatish Balay 0,0,0,0, 38*79bdfe76SSatish Balay 0,0,0,0, 39*79bdfe76SSatish Balay 0,0,0,0, 40*79bdfe76SSatish Balay 0,0,0,0, 41*79bdfe76SSatish Balay 0,0,0,0, 42*79bdfe76SSatish Balay 0,0,0,0, 43*79bdfe76SSatish Balay 0,0,0,0, 44*79bdfe76SSatish Balay 0,0,0,0, 45*79bdfe76SSatish Balay 0,0,0,0, 46*79bdfe76SSatish Balay 0,0,0,0, 47*79bdfe76SSatish Balay 0}; 48*79bdfe76SSatish Balay 49*79bdfe76SSatish Balay 50*79bdfe76SSatish Balay /*@C 51*79bdfe76SSatish Balay MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 52*79bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 53*79bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 54*79bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 55*79bdfe76SSatish Balay performance can be increased by more than a factor of 50. 56*79bdfe76SSatish Balay 57*79bdfe76SSatish Balay Input Parameters: 58*79bdfe76SSatish Balay . comm - MPI communicator 59*79bdfe76SSatish Balay . bs - size of blockk 60*79bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 61*79bdfe76SSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated 62*79bdfe76SSatish Balay if N is given) 63*79bdfe76SSatish Balay . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 64*79bdfe76SSatish Balay . N - number of global columns (or PETSC_DECIDE to have calculated 65*79bdfe76SSatish Balay if n is given) 66*79bdfe76SSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 67*79bdfe76SSatish Balay submatrix (same for all local rows) 68*79bdfe76SSatish Balay . d_nzz - number of block nonzeros per block row in diagonal portion of local 69*79bdfe76SSatish Balay submatrix or null (possibly different for each row). You must leave 70*79bdfe76SSatish Balay room for the diagonal entry even if it is zero. 71*79bdfe76SSatish Balay . o_nz - number of block nonzeros per block row in off-diagonal portion of local 72*79bdfe76SSatish Balay submatrix (same for all local rows). 73*79bdfe76SSatish Balay . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 74*79bdfe76SSatish Balay submatrix or null (possibly different for each row). 75*79bdfe76SSatish Balay 76*79bdfe76SSatish Balay Output Parameter: 77*79bdfe76SSatish Balay . A - the matrix 78*79bdfe76SSatish Balay 79*79bdfe76SSatish Balay Notes: 80*79bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 81*79bdfe76SSatish Balay (possibly both). 82*79bdfe76SSatish Balay 83*79bdfe76SSatish Balay Storage Information: 84*79bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 85*79bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 86*79bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 87*79bdfe76SSatish Balay local matrix (a rectangular submatrix). 88*79bdfe76SSatish Balay 89*79bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 90*79bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 91*79bdfe76SSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 92*79bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 93*79bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 94*79bdfe76SSatish Balay 95*79bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 96*79bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 97*79bdfe76SSatish Balay 98*79bdfe76SSatish Balay $ 0 1 2 3 4 5 6 7 8 9 10 11 99*79bdfe76SSatish Balay $ ------------------- 100*79bdfe76SSatish Balay $ row 3 | o o o d d d o o o o o o 101*79bdfe76SSatish Balay $ row 4 | o o o d d d o o o o o o 102*79bdfe76SSatish Balay $ row 5 | o o o d d d o o o o o o 103*79bdfe76SSatish Balay $ ------------------- 104*79bdfe76SSatish Balay $ 105*79bdfe76SSatish Balay 106*79bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 107*79bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 108*79bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 109*79bdfe76SSatish Balay stored simply in the MATSEQAIJ format for compressed row storage. 110*79bdfe76SSatish Balay 111*79bdfe76SSatish Balay Now d_nz should indicate the number of nonzeros per row in the d matrix, 112*79bdfe76SSatish Balay and o_nz should indicate the number of nonzeros per row in the o matrix. 113*79bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 114*79bdfe76SSatish Balay one expects d_nz >> o_nz. For additional details, see the users manual 115*79bdfe76SSatish Balay chapter on matrices and the file $(PETSC_DIR)/Performance. 116*79bdfe76SSatish Balay 117*79bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel 118*79bdfe76SSatish Balay 119*79bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 120*79bdfe76SSatish Balay @*/ 121*79bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 122*79bdfe76SSatish Balay int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 123*79bdfe76SSatish Balay { 124*79bdfe76SSatish Balay Mat B; 125*79bdfe76SSatish Balay Mat_MPIBAIJ *b; 126*79bdfe76SSatish Balay int ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs; 127*79bdfe76SSatish Balay 128*79bdfe76SSatish Balay if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 129*79bdfe76SSatish Balay *A = 0; 130*79bdfe76SSatish Balay PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 131*79bdfe76SSatish Balay PLogObjectCreate(B); 132*79bdfe76SSatish Balay B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 133*79bdfe76SSatish Balay PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 134*79bdfe76SSatish Balay PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 135*79bdfe76SSatish Balay B->destroy = MatDestroy_MPIBAIJ; 136*79bdfe76SSatish Balay /* 137*79bdfe76SSatish Balay B->view = MatView_MPIBAIJ; 138*79bdfe76SSatish Balay */ 139*79bdfe76SSatish Balay B->factor = 0; 140*79bdfe76SSatish Balay B->assembled = PETSC_FALSE; 141*79bdfe76SSatish Balay 142*79bdfe76SSatish Balay b->insertmode = NOT_SET_VALUES; 143*79bdfe76SSatish Balay MPI_Comm_rank(comm,&b->rank); 144*79bdfe76SSatish Balay MPI_Comm_size(comm,&b->size); 145*79bdfe76SSatish Balay 146*79bdfe76SSatish Balay if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 147*79bdfe76SSatish Balay SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 148*79bdfe76SSatish Balay 149*79bdfe76SSatish Balay if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 150*79bdfe76SSatish Balay work[0] = m; work[1] = n; 151*79bdfe76SSatish Balay mbs = m/bs; nbs = n/bs; 152*79bdfe76SSatish Balay if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 153*79bdfe76SSatish Balay MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 154*79bdfe76SSatish Balay if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 155*79bdfe76SSatish Balay if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 156*79bdfe76SSatish Balay } 157*79bdfe76SSatish Balay if (m == PETSC_DECIDE) { 158*79bdfe76SSatish Balay Mbs = M/bs; 159*79bdfe76SSatish Balay if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 160*79bdfe76SSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 161*79bdfe76SSatish Balay m = mbs*bs; 162*79bdfe76SSatish Balay } 163*79bdfe76SSatish Balay if (n == PETSC_DECIDE) { 164*79bdfe76SSatish Balay Nbs = N/bs; 165*79bdfe76SSatish Balay if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 166*79bdfe76SSatish Balay nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 167*79bdfe76SSatish Balay n = nbs*bs; 168*79bdfe76SSatish Balay } 169*79bdfe76SSatish Balay 170*79bdfe76SSatish Balay b->m = m; B->m = m; 171*79bdfe76SSatish Balay b->n = n; B->n = n; 172*79bdfe76SSatish Balay b->N = N; B->N = N; 173*79bdfe76SSatish Balay b->M = M; B->M = M; 174*79bdfe76SSatish Balay b->bs = bs; 175*79bdfe76SSatish Balay b->bs2 = bs*bs; 176*79bdfe76SSatish Balay b->mbs = mbs; 177*79bdfe76SSatish Balay b->nbs = nbs; 178*79bdfe76SSatish Balay b->Mbs = Mbs; 179*79bdfe76SSatish Balay b->Nbs = Nbs; 180*79bdfe76SSatish Balay 181*79bdfe76SSatish Balay /* build local table of row and column ownerships */ 182*79bdfe76SSatish Balay b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 183*79bdfe76SSatish Balay PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 184*79bdfe76SSatish Balay b->cowners = b->rowners + b->size + 1; 185*79bdfe76SSatish Balay MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 186*79bdfe76SSatish Balay b->rowners[0] = 0; 187*79bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 188*79bdfe76SSatish Balay b->rowners[i] += b->rowners[i-1]; 189*79bdfe76SSatish Balay } 190*79bdfe76SSatish Balay b->rstart = b->rowners[b->rank]; 191*79bdfe76SSatish Balay b->rend = b->rowners[b->rank+1]; 192*79bdfe76SSatish Balay MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 193*79bdfe76SSatish Balay b->cowners[0] = 0; 194*79bdfe76SSatish Balay for ( i=2; i<=b->size; i++ ) { 195*79bdfe76SSatish Balay b->cowners[i] += b->cowners[i-1]; 196*79bdfe76SSatish Balay } 197*79bdfe76SSatish Balay b->cstart = b->cowners[b->rank]; 198*79bdfe76SSatish Balay b->cend = b->cowners[b->rank+1]; 199*79bdfe76SSatish Balay 200*79bdfe76SSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 201*79bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 202*79bdfe76SSatish Balay PLogObjectParent(B,b->A); 203*79bdfe76SSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 204*79bdfe76SSatish Balay ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 205*79bdfe76SSatish Balay PLogObjectParent(B,b->B); 206*79bdfe76SSatish Balay 207*79bdfe76SSatish Balay /* build cache for off array entries formed */ 208*79bdfe76SSatish Balay ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 209*79bdfe76SSatish Balay b->colmap = 0; 210*79bdfe76SSatish Balay b->garray = 0; 211*79bdfe76SSatish Balay b->roworiented = 1; 212*79bdfe76SSatish Balay 213*79bdfe76SSatish Balay /* stuff used for matrix vector multiply */ 214*79bdfe76SSatish Balay b->lvec = 0; 215*79bdfe76SSatish Balay b->Mvctx = 0; 216*79bdfe76SSatish Balay 217*79bdfe76SSatish Balay /* stuff for MatGetRow() */ 218*79bdfe76SSatish Balay b->rowindices = 0; 219*79bdfe76SSatish Balay b->rowvalues = 0; 220*79bdfe76SSatish Balay b->getrowactive = PETSC_FALSE; 221*79bdfe76SSatish Balay 222*79bdfe76SSatish Balay *A = B; 223*79bdfe76SSatish Balay return 0; 224*79bdfe76SSatish Balay } 225