xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 79bdfe76e93bdaba25c041a7eff09eee2f1aab15)
1*79bdfe76SSatish Balay #ifndef lint
2*79bdfe76SSatish Balay static char vcid[] = "$Id: mbaijfact.c,v 1.1 1996/06/03 19:55:56 balay Exp $";
3*79bdfe76SSatish Balay #endif
4*79bdfe76SSatish Balay 
5*79bdfe76SSatish Balay #include "mpibaij.h"
6*79bdfe76SSatish Balay 
7*79bdfe76SSatish Balay 
8*79bdfe76SSatish Balay static int MatDestroy_MPIBAIJ(PetscObject obj)
9*79bdfe76SSatish Balay {
10*79bdfe76SSatish Balay   Mat         mat = (Mat) obj;
11*79bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
12*79bdfe76SSatish Balay   int         ierr;
13*79bdfe76SSatish Balay 
14*79bdfe76SSatish Balay #if defined(PETSC_LOG)
15*79bdfe76SSatish Balay   PLogObjectState(obj,"Rows=%d, Cols=%d",baij->M,baij->N);
16*79bdfe76SSatish Balay #endif
17*79bdfe76SSatish Balay 
18*79bdfe76SSatish Balay   PetscFree(baij->rowners);
19*79bdfe76SSatish Balay   ierr = MatDestroy(baij->A); CHKERRQ(ierr);
20*79bdfe76SSatish Balay   ierr = MatDestroy(baij->B); CHKERRQ(ierr);
21*79bdfe76SSatish Balay   if (baij->colmap) PetscFree(baij->colmap);
22*79bdfe76SSatish Balay   if (baij->garray) PetscFree(baij->garray);
23*79bdfe76SSatish Balay   if (baij->lvec)   VecDestroy(baij->lvec);
24*79bdfe76SSatish Balay   if (baij->Mvctx)  VecScatterDestroy(baij->Mvctx);
25*79bdfe76SSatish Balay   if (baij->rowvalues) PetscFree(baij->rowvalues);
26*79bdfe76SSatish Balay   PetscFree(baij);
27*79bdfe76SSatish Balay   PLogObjectDestroy(mat);
28*79bdfe76SSatish Balay   PetscHeaderDestroy(mat);
29*79bdfe76SSatish Balay   return 0;
30*79bdfe76SSatish Balay }
31*79bdfe76SSatish Balay 
32*79bdfe76SSatish Balay /* -------------------------------------------------------------------*/
33*79bdfe76SSatish Balay static struct _MatOps MatOps = {
34*79bdfe76SSatish Balay   0,0,0,0,
35*79bdfe76SSatish Balay   0,0,0,0,
36*79bdfe76SSatish Balay   0,0,0,0,
37*79bdfe76SSatish Balay   0,0,0,0,
38*79bdfe76SSatish Balay   0,0,0,0,
39*79bdfe76SSatish Balay   0,0,0,0,
40*79bdfe76SSatish Balay   0,0,0,0,
41*79bdfe76SSatish Balay   0,0,0,0,
42*79bdfe76SSatish Balay   0,0,0,0,
43*79bdfe76SSatish Balay   0,0,0,0,
44*79bdfe76SSatish Balay   0,0,0,0,
45*79bdfe76SSatish Balay   0,0,0,0,
46*79bdfe76SSatish Balay   0,0,0,0,
47*79bdfe76SSatish Balay   0};
48*79bdfe76SSatish Balay 
49*79bdfe76SSatish Balay 
50*79bdfe76SSatish Balay /*@C
51*79bdfe76SSatish Balay    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
52*79bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
53*79bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
54*79bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
55*79bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
56*79bdfe76SSatish Balay 
57*79bdfe76SSatish Balay    Input Parameters:
58*79bdfe76SSatish Balay .  comm - MPI communicator
59*79bdfe76SSatish Balay .  bs   - size of blockk
60*79bdfe76SSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
61*79bdfe76SSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated
62*79bdfe76SSatish Balay            if N is given)
63*79bdfe76SSatish Balay .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
64*79bdfe76SSatish Balay .  N - number of global columns (or PETSC_DECIDE to have calculated
65*79bdfe76SSatish Balay            if n is given)
66*79bdfe76SSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
67*79bdfe76SSatish Balay            submatrix  (same for all local rows)
68*79bdfe76SSatish Balay .  d_nzz - number of block nonzeros per block row in diagonal portion of local
69*79bdfe76SSatish Balay            submatrix or null (possibly different for each row).  You must leave
70*79bdfe76SSatish Balay            room for the diagonal entry even if it is zero.
71*79bdfe76SSatish Balay .  o_nz  - number of block nonzeros per block row in off-diagonal portion of local
72*79bdfe76SSatish Balay            submatrix (same for all local rows).
73*79bdfe76SSatish Balay .  o_nzz - number of block nonzeros per block row in off-diagonal portion of local
74*79bdfe76SSatish Balay            submatrix or null (possibly different for each row).
75*79bdfe76SSatish Balay 
76*79bdfe76SSatish Balay    Output Parameter:
77*79bdfe76SSatish Balay .  A - the matrix
78*79bdfe76SSatish Balay 
79*79bdfe76SSatish Balay    Notes:
80*79bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
81*79bdfe76SSatish Balay    (possibly both).
82*79bdfe76SSatish Balay 
83*79bdfe76SSatish Balay    Storage Information:
84*79bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
85*79bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
86*79bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
87*79bdfe76SSatish Balay    local matrix (a rectangular submatrix).
88*79bdfe76SSatish Balay 
89*79bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
90*79bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
91*79bdfe76SSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
92*79bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
93*79bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
94*79bdfe76SSatish Balay 
95*79bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
96*79bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
97*79bdfe76SSatish Balay 
98*79bdfe76SSatish Balay $          0 1 2 3 4 5 6 7 8 9 10 11
99*79bdfe76SSatish Balay $         -------------------
100*79bdfe76SSatish Balay $  row 3  |  o o o d d d o o o o o o
101*79bdfe76SSatish Balay $  row 4  |  o o o d d d o o o o o o
102*79bdfe76SSatish Balay $  row 5  |  o o o d d d o o o o o o
103*79bdfe76SSatish Balay $         -------------------
104*79bdfe76SSatish Balay $
105*79bdfe76SSatish Balay 
106*79bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
107*79bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
108*79bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
109*79bdfe76SSatish Balay    stored simply in the MATSEQAIJ format for compressed row storage.
110*79bdfe76SSatish Balay 
111*79bdfe76SSatish Balay    Now d_nz should indicate the number of nonzeros per row in the d matrix,
112*79bdfe76SSatish Balay    and o_nz should indicate the number of nonzeros per row in the o matrix.
113*79bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
114*79bdfe76SSatish Balay    one expects d_nz >> o_nz.   For additional details, see the users manual
115*79bdfe76SSatish Balay    chapter on matrices and the file $(PETSC_DIR)/Performance.
116*79bdfe76SSatish Balay 
117*79bdfe76SSatish Balay .keywords: matrix, aij, compressed row, sparse, parallel
118*79bdfe76SSatish Balay 
119*79bdfe76SSatish Balay .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues()
120*79bdfe76SSatish Balay @*/
121*79bdfe76SSatish Balay int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,
122*79bdfe76SSatish Balay                     int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
123*79bdfe76SSatish Balay {
124*79bdfe76SSatish Balay   Mat          B;
125*79bdfe76SSatish Balay   Mat_MPIBAIJ  *b;
126*79bdfe76SSatish Balay   int          ierr, i,sum[2],work[2],mbs,nbs,Mbs,Nbs;
127*79bdfe76SSatish Balay 
128*79bdfe76SSatish Balay   if (bs < 1) SETERRQ(1,"MatCreateMPIBAIJ: invalid block size specified");
129*79bdfe76SSatish Balay   *A = 0;
130*79bdfe76SSatish Balay   PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATMPIBAIJ,comm);
131*79bdfe76SSatish Balay   PLogObjectCreate(B);
132*79bdfe76SSatish Balay   B->data       = (void *) (b = PetscNew(Mat_MPIBAIJ)); CHKPTRQ(b);
133*79bdfe76SSatish Balay   PetscMemzero(b,sizeof(Mat_MPIBAIJ));
134*79bdfe76SSatish Balay   PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps));
135*79bdfe76SSatish Balay   B->destroy    = MatDestroy_MPIBAIJ;
136*79bdfe76SSatish Balay   /*
137*79bdfe76SSatish Balay   B->view       = MatView_MPIBAIJ;
138*79bdfe76SSatish Balay   */
139*79bdfe76SSatish Balay   B->factor     = 0;
140*79bdfe76SSatish Balay   B->assembled  = PETSC_FALSE;
141*79bdfe76SSatish Balay 
142*79bdfe76SSatish Balay   b->insertmode = NOT_SET_VALUES;
143*79bdfe76SSatish Balay   MPI_Comm_rank(comm,&b->rank);
144*79bdfe76SSatish Balay   MPI_Comm_size(comm,&b->size);
145*79bdfe76SSatish Balay 
146*79bdfe76SSatish Balay   if (m == PETSC_DECIDE && (d_nnz != PETSC_NULL || o_nnz != PETSC_NULL))
147*79bdfe76SSatish Balay     SETERRQ(1,"MatCreateMPIAIJ:Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
148*79bdfe76SSatish Balay 
149*79bdfe76SSatish Balay   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
150*79bdfe76SSatish Balay     work[0] = m; work[1] = n;
151*79bdfe76SSatish Balay     mbs = m/bs; nbs = n/bs;
152*79bdfe76SSatish Balay     if (mbs*bs != m || nbs*n != nbs) SETERRQ(1,"MatCreateMPIBAIJ: No of local rows, cols must be divisible by blocksize");
153*79bdfe76SSatish Balay     MPI_Allreduce( work, sum,2,MPI_INT,MPI_SUM,comm );
154*79bdfe76SSatish Balay     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
155*79bdfe76SSatish Balay     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
156*79bdfe76SSatish Balay   }
157*79bdfe76SSatish Balay   if (m == PETSC_DECIDE) {
158*79bdfe76SSatish Balay     Mbs = M/bs;
159*79bdfe76SSatish Balay     if (Mbs*bs != M) SETERRQ(1,"MatCreateMPIBAIJ: No of global rows must be divisible by blocksize");
160*79bdfe76SSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
161*79bdfe76SSatish Balay     m   = mbs*bs;
162*79bdfe76SSatish Balay   }
163*79bdfe76SSatish Balay   if (n == PETSC_DECIDE) {
164*79bdfe76SSatish Balay     Nbs = N/bs;
165*79bdfe76SSatish Balay     if (Nbs*bs != N) SETERRQ(1,"MatCreateMPIBAIJ: No of global cols must be divisible by blocksize");
166*79bdfe76SSatish Balay     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
167*79bdfe76SSatish Balay     n   = nbs*bs;
168*79bdfe76SSatish Balay   }
169*79bdfe76SSatish Balay 
170*79bdfe76SSatish Balay   b->m = m; B->m = m;
171*79bdfe76SSatish Balay   b->n = n; B->n = n;
172*79bdfe76SSatish Balay   b->N = N; B->N = N;
173*79bdfe76SSatish Balay   b->M = M; B->M = M;
174*79bdfe76SSatish Balay   b->bs  = bs;
175*79bdfe76SSatish Balay   b->bs2 = bs*bs;
176*79bdfe76SSatish Balay   b->mbs = mbs;
177*79bdfe76SSatish Balay   b->nbs = nbs;
178*79bdfe76SSatish Balay   b->Mbs = Mbs;
179*79bdfe76SSatish Balay   b->Nbs = Nbs;
180*79bdfe76SSatish Balay 
181*79bdfe76SSatish Balay   /* build local table of row and column ownerships */
182*79bdfe76SSatish Balay   b->rowners = (int *) PetscMalloc(2*(b->size+2)*sizeof(int)); CHKPTRQ(b->rowners);
183*79bdfe76SSatish Balay   PLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_MPIBAIJ));
184*79bdfe76SSatish Balay   b->cowners = b->rowners + b->size + 1;
185*79bdfe76SSatish Balay   MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);
186*79bdfe76SSatish Balay   b->rowners[0] = 0;
187*79bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
188*79bdfe76SSatish Balay     b->rowners[i] += b->rowners[i-1];
189*79bdfe76SSatish Balay   }
190*79bdfe76SSatish Balay   b->rstart = b->rowners[b->rank];
191*79bdfe76SSatish Balay   b->rend   = b->rowners[b->rank+1];
192*79bdfe76SSatish Balay   MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);
193*79bdfe76SSatish Balay   b->cowners[0] = 0;
194*79bdfe76SSatish Balay   for ( i=2; i<=b->size; i++ ) {
195*79bdfe76SSatish Balay     b->cowners[i] += b->cowners[i-1];
196*79bdfe76SSatish Balay   }
197*79bdfe76SSatish Balay   b->cstart = b->cowners[b->rank];
198*79bdfe76SSatish Balay   b->cend   = b->cowners[b->rank+1];
199*79bdfe76SSatish Balay 
200*79bdfe76SSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
201*79bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A); CHKERRQ(ierr);
202*79bdfe76SSatish Balay   PLogObjectParent(B,b->A);
203*79bdfe76SSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
204*79bdfe76SSatish Balay   ierr = MatCreateSeqBAIJ(MPI_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B); CHKERRQ(ierr);
205*79bdfe76SSatish Balay   PLogObjectParent(B,b->B);
206*79bdfe76SSatish Balay 
207*79bdfe76SSatish Balay   /* build cache for off array entries formed */
208*79bdfe76SSatish Balay   ierr = StashBuild_Private(&b->stash); CHKERRQ(ierr);
209*79bdfe76SSatish Balay   b->colmap      = 0;
210*79bdfe76SSatish Balay   b->garray      = 0;
211*79bdfe76SSatish Balay   b->roworiented = 1;
212*79bdfe76SSatish Balay 
213*79bdfe76SSatish Balay   /* stuff used for matrix vector multiply */
214*79bdfe76SSatish Balay   b->lvec      = 0;
215*79bdfe76SSatish Balay   b->Mvctx     = 0;
216*79bdfe76SSatish Balay 
217*79bdfe76SSatish Balay   /* stuff for MatGetRow() */
218*79bdfe76SSatish Balay   b->rowindices   = 0;
219*79bdfe76SSatish Balay   b->rowvalues    = 0;
220*79bdfe76SSatish Balay   b->getrowactive = PETSC_FALSE;
221*79bdfe76SSatish Balay 
222*79bdfe76SSatish Balay   *A = B;
223*79bdfe76SSatish Balay   return 0;
224*79bdfe76SSatish Balay }
225