1 #ifndef lint 2 static char vcid[] = "$Id: mbaijfact.c,v 1.1 1996/06/03 19:55:56 balay Exp $"; 3 #endif 4 5 #include "mpibaij.h" 6 7 8 static int MatDestroy_MPIBAIJ(PetscObject obj) 9 { 10 Mat mat = (Mat) obj; 11 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 12 int ierr; 13 14 #if defined(PETSC_LOG) 15 PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N); 16 #endif 17 18 PetscFree(baij->rowners); 19 ierr = MatDestroy(baij->A); CHKERRQ(ierr); 20 ierr = MatDestroy(baij->B); CHKERRQ(ierr); 21 if (baij->colmap) PetscFree(baij->colmap); 22 if (baij->garray) PetscFree(baij->garray); 23 if (baij->lvec) VecDestroy(baij->lvec); 24 if (baij->Mvctx) VecScatterDestroy(baij->Mvctx); 25 if (baij->rowvalues) PetscFree(baij->rowvalues); 26 PetscFree(baij); 27 PLogObjectDestroy(mat); 28 PetscHeaderDestroy(mat); 29 return 0; 30 } 31 32 /* -------------------------------------------------------------------*/ 33 static struct _MatOps MatOps = { 34 0,0,0,0, 35 0,0,0,0, 36 0,0,0,0, 37 0,0,0,0, 38 0,0,0,0, 39 0,0,0,0, 40 0,0,0,0, 41 0,0,0,0, 42 0,0,0,0, 43 0,0,0,0, 44 0,0,0,0, 45 0,0,0,0, 46 0,0,0,0, 47 0}; 48 49 50 /*@C 51 MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format 52 (block compressed row). For good matrix assembly performance 53 the user should preallocate the matrix storage by setting the parameters 54 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 55 performance can be increased by more than a factor of 50. 56 57 Input Parameters: 58 . comm - MPI communicator 59 . bs - size of blockk 60 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 61 . n - number of local columns (or PETSC_DECIDE to have calculated 62 if N is given) 63 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 64 . N - number of global columns (or PETSC_DECIDE to have calculated 65 if n is given) 66 . d_nz - number of block nonzeros per block row in diagonal portion of local 67 submatrix (same for all local rows) 68 . d_nzz - number of block nonzeros per block row in diagonal portion of local 69 submatrix or null (possibly different for each row). You must leave 70 room for the diagonal entry even if it is zero. 71 . o_nz - number of block nonzeros per block row in off-diagonal portion of local 72 submatrix (same for all local rows). 73 . o_nzz - number of block nonzeros per block row in off-diagonal portion of local 74 submatrix or null (possibly different for each row). 75 76 Output Parameter: 77 . A - the matrix 78 79 Notes: 80 The user MUST specify either the local or global matrix dimensions 81 (possibly both). 82 83 Storage Information: 84 For a square global matrix we define each processor's diagonal portion 85 to be its local rows and the corresponding columns (a square submatrix); 86 each processor's off-diagonal portion encompasses the remainder of the 87 local matrix (a rectangular submatrix). 88 89 The user can specify preallocated storage for the diagonal part of 90 the local submatrix with either d_nz or d_nnz (not both). Set 91 d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 92 memory allocation. Likewise, specify preallocated storage for the 93 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 94 95 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 96 the figure below we depict these three local rows and all columns (0-11). 97 98 $ 0 1 2 3 4 5 6 7 8 9 10 11 99 $ ------------------- 100 $ row 3 | o o o d d d o o o o o o 101 $ row 4 | o o o d d d o o o o o o 102 $ row 5 | o o o d d d o o o o o o 103 $ ------------------- 104 $ 105 106 Thus, any entries in the d locations are stored in the d (diagonal) 107 submatrix, and any entries in the o locations are stored in the 108 o (off-diagonal) submatrix. Note that the d and the o submatrices are 109 stored simply in the MATSEQAIJ format for compressed row storage. 110 111 Now d_nz should indicate the number of nonzeros per row in the d matrix, 112 and o_nz should indicate the number of nonzeros per row in the o matrix. 113 In general, for PDE problems in which most nonzeros are near the diagonal, 114 one expects d_nz >> o_nz. For additional details, see the users manual 115 chapter on matrices and the file $(PETSC_DIR)/Performance. 116 117 .keywords: matrix, aij, compressed row, sparse, parallel 118 119 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues() 120 @*/ 121 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N, 122 int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 123 { 124 Mat B; 125 Mat_MPIBAIJ *b; 126 int ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs; 127 128 if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified"); 129 *A = 0; 130 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm); 131 PLogObjectCreate(B); 132 B->data = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b); 133 PetscMemzero(b,sizeof(Mat_MPIBAIJ)); 134 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 135 B->destroy = MatDestroy_MPIBAIJ; 136 /* 137 B->view = MatView_MPIBAIJ; 138 */ 139 B->factor = 0; 140 B->assembled = PETSC_FALSE; 141 142 b->insertmode = NOT_SET_VALUES; 143 MPI_Comm_rank(comm,&b->rank); 144 MPI_Comm_size(comm,&b->size); 145 146 if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL)) 147 SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 148 149 if (M == PETSC_DECIDE || N == PETSC_DECIDE) { 150 work[0] = m; work[1] = n; 151 mbs = m/bs; nbs = n/bs; 152 if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize"); 153 MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm ); 154 if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;} 155 if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;} 156 } 157 if (m == PETSC_DECIDE) { 158 Mbs = M/bs; 159 if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize"); 160 mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 161 m = mbs*bs; 162 } 163 if (n == PETSC_DECIDE) { 164 Nbs = N/bs; 165 if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize"); 166 nbs = Nbs/b->size + ((Nbs % b->size) > b->rank); 167 n = nbs*bs; 168 } 169 170 b->m = m; B->m = m; 171 b->n = n; B->n = n; 172 b->N = N; B->N = N; 173 b->M = M; B->M = M; 174 b->bs = bs; 175 b->bs2 = bs*bs; 176 b->mbs = mbs; 177 b->nbs = nbs; 178 b->Mbs = Mbs; 179 b->Nbs = Nbs; 180 181 /* build local table of row and column ownerships */ 182 b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners); 183 PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ)); 184 b->cowners = b->rowners + b->size + 1; 185 MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm); 186 b->rowners[0] = 0; 187 for ( i=2; i<=b->size; i++ ) { 188 b->rowners[i] += b->rowners[i-1]; 189 } 190 b->rstart = b->rowners[b->rank]; 191 b->rend = b->rowners[b->rank+1]; 192 MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm); 193 b->cowners[0] = 0; 194 for ( i=2; i<=b->size; i++ ) { 195 b->cowners[i] += b->cowners[i-1]; 196 } 197 b->cstart = b->cowners[b->rank]; 198 b->cend = b->cowners[b->rank+1]; 199 200 if (d_nz == PETSC_DEFAULT) d_nz = 5; 201 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr); 202 PLogObjectParent(B,b->A); 203 if (o_nz == PETSC_DEFAULT) o_nz = 0; 204 ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr); 205 PLogObjectParent(B,b->B); 206 207 /* build cache for off array entries formed */ 208 ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr); 209 b->colmap = 0; 210 b->garray = 0; 211 b->roworiented = 1; 212 213 /* stuff used for matrix vector multiply */ 214 b->lvec = 0; 215 b->Mvctx = 0; 216 217 /* stuff for MatGetRow() */ 218 b->rowindices = 0; 219 b->rowvalues = 0; 220 b->getrowactive = PETSC_FALSE; 221 222 *A = B; 223 return 0; 224 } 225