xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ebcf1981e4b18312f7d47235aee1f2bd3ce7569e)
1 #ifndef lint
2 static char vcid[] = "$Id: mbaijfact.c,v 1.1 1996/06/03 19:55:56 balay Exp $";
3 #endif
4 
5 #include "mpibaij.h"
6 
7 
8 static int MatDestroy_MPIBAIJ(PetscObject obj)
9 {
10   Mat         mat = (Mat) obj;
11   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
12   int         ierr;
13 
14 #if defined(PETSC_LOG)
15   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
16 #endif
17 
18   PetscFree(baij->rowners);
19   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
20   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
21   if (baij->colmap) PetscFree(baij->colmap);
22   if (baij->garray) PetscFree(baij->garray);
23   if (baij->lvec)   VecDestroy(baij->lvec);
24   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
25   if (baij->rowvalues) PetscFree(baij->rowvalues);
26   PetscFree(baij);
27   PLogObjectDestroy(mat);
28   PetscHeaderDestroy(mat);
29   return 0;
30 }
31 
32 /* -------------------------------------------------------------------*/
33 static struct _MatOps MatOps = {
34   0,0,0,0,
35   0,0,0,0,
36   0,0,0,0,
37   0,0,0,0,
38   0,0,0,0,
39   0,0,0,0,
40   0,0,0,0,
41   0,0,0,0,
42   0,0,0,0,
43   0,0,0,0,
44   0,0,0,0,
45   0,0,0,0,
46   0,0,0,0,
47   0};
48 
49 
50 /*@C
51    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
52    (block compressed row).  For good matrix assembly performance
53    the user should preallocate the matrix storage by setting the parameters
54    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
55    performance can be increased by more than a factor of 50.
56 
57    Input Parameters:
58 .  comm - MPI communicator
59 .  bs   - size of blockk
60 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
61 .  n - number of local columns (or PETSC_DECIDE to have calculated
62            if N is given)
63 .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
64 .  N - number of global columns (or PETSC_DECIDE to have calculated
65            if n is given)
66 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
67            submatrix  (same for all local rows)
68 .  d_nzz - number of block nonzeros per block row in diagonal portion of local
69            submatrix or null (possibly different for each row).  You must leave
70            room for the diagonal entry even if it is zero.
71 .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
72            submatrix (same for all local rows).
73 .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
74            submatrix or null (possibly different for each row).
75 
76    Output Parameter:
77 .  A - the matrix
78 
79    Notes:
80    The user MUST specify either the local or global matrix dimensions
81    (possibly both).
82 
83    Storage Information:
84    For a square global matrix we define each processor's diagonal portion
85    to be its local rows and the corresponding columns (a square submatrix);
86    each processor's off-diagonal portion encompasses the remainder of the
87    local matrix (a rectangular submatrix).
88 
89    The user can specify preallocated storage for the diagonal part of
90    the local submatrix with either d_nz or d_nnz (not both).  Set
91    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
92    memory allocation.  Likewise, specify preallocated storage for the
93    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
94 
95    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
96    the figure below we depict these three local rows and all columns (0-11).
97 
98 $          0 1 2 3 4 5 6 7 8 9 10 11
99 $         -------------------
100 $  row 3  |  o o o d d d o o o o o o
101 $  row 4  |  o o o d d d o o o o o o
102 $  row 5  |  o o o d d d o o o o o o
103 $         -------------------
104 $
105 
106    Thus, any entries in the d locations are stored in the d (diagonal)
107    submatrix, and any entries in the o locations are stored in the
108    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
109    stored simply in the MATSEQAIJ format for compressed row storage.
110 
111    Now d_nz should indicate the number of nonzeros per row in the d matrix,
112    and o_nz should indicate the number of nonzeros per row in the o matrix.
113    In general, for PDE problems in which most nonzeros are near the diagonal,
114    one expects d_nz >> o_nz.   For additional details, see the users manual
115    chapter on matrices and the file $(PETSC_DIR)/Performance.
116 
117 .keywords: matrix, aij, compressed row, sparse, parallel
118 
119 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
120 @*/
121 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
122                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
123 {
124   Mat          B;
125   Mat_MPIBAIJ  *b;
126   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs;
127 
128   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
129   *A = 0;
130   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
131   PLogObjectCreate(B);
132   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
133   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
134   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
135   B->destroy    = MatDestroy_MPIBAIJ;
136   /*
137   B->view       = MatView_MPIBAIJ;
138   */
139   B->factor     = 0;
140   B->assembled  = PETSC_FALSE;
141 
142   b->insertmode = NOT_SET_VALUES;
143   MPI_Comm_rank(comm,&b->rank);
144   MPI_Comm_size(comm,&b->size);
145 
146   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
147     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
148 
149   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
150     work[0] = m; work[1] = n;
151     mbs = m/bs; nbs = n/bs;
152     if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
153     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
154     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
155     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
156   }
157   if (m == PETSC_DECIDE) {
158     Mbs = M/bs;
159     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
160     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
161     m   = mbs*bs;
162   }
163   if (n == PETSC_DECIDE) {
164     Nbs = N/bs;
165     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
166     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
167     n   = nbs*bs;
168   }
169 
170   b->m = m; B->m = m;
171   b->n = n; B->n = n;
172   b->N = N; B->N = N;
173   b->M = M; B->M = M;
174   b->bs  = bs;
175   b->bs2 = bs*bs;
176   b->mbs = mbs;
177   b->nbs = nbs;
178   b->Mbs = Mbs;
179   b->Nbs = Nbs;
180 
181   /* build local table of row and column ownerships */
182   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
183   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
184   b->cowners = b->rowners + b->size + 1;
185   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
186   b->rowners[0] = 0;
187   for ( i=2; i<=b->size; i++ ) {
188     b->rowners[i] += b->rowners[i-1];
189   }
190   b->rstart = b->rowners[b->rank];
191   b->rend   = b->rowners[b->rank+1];
192   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
193   b->cowners[0] = 0;
194   for ( i=2; i<=b->size; i++ ) {
195     b->cowners[i] += b->cowners[i-1];
196   }
197   b->cstart = b->cowners[b->rank];
198   b->cend   = b->cowners[b->rank+1];
199 
200   if (d_nz == PETSC_DEFAULT) d_nz = 5;
201   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
202   PLogObjectParent(B,b->A);
203   if (o_nz == PETSC_DEFAULT) o_nz = 0;
204   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
205   PLogObjectParent(B,b->B);
206 
207   /* build cache for off array entries formed */
208   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
209   b->colmap      = 0;
210   b->garray      = 0;
211   b->roworiented = 1;
212 
213   /* stuff used for matrix vector multiply */
214   b->lvec      = 0;
215   b->Mvctx     = 0;
216 
217   /* stuff for MatGetRow() */
218   b->rowindices   = 0;
219   b->rowvalues    = 0;
220   b->getrowactive = PETSC_FALSE;
221 
222   *A = B;
223   return 0;
224 }
225