xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision b17e31d2f176b50387fb57342d7b706a7b527823)
1 
2 /*
3    Support for the parallel SBAIJ matrix vector multiply
4 */
5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
6 
7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8 {
9   Mat_MPISBAIJ   *sbaij = (Mat_MPISBAIJ*)mat->data;
10   Mat_SeqBAIJ    *B     = (Mat_SeqBAIJ*)(sbaij->B->data);
11   PetscErrorCode ierr;
12   PetscInt       Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray;
13   PetscInt       bs  = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt;
14   IS             from,to;
15   Vec            gvec;
16   PetscMPIInt    rank   =sbaij->rank,lsize,size=sbaij->size;
17   PetscInt       *owners=sbaij->rangebs,*ec_owner,k;
18   const PetscInt *sowners;
19   PetscScalar    *ptr;
20 
21   PetscFunctionBegin;
22   ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr);
23 
24   /* For the first stab we make an array as long as the number of columns */
25   /* mark those columns that are in sbaij->B */
26   ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr);
27   for (i=0; i<mbs; i++) {
28     for (j=0; j<B->ilen[i]; j++) {
29       if (!indices[aj[B->i[i] + j]]) ec++;
30       indices[aj[B->i[i] + j]] = 1;
31     }
32   }
33 
34   /* form arrays of columns we need */
35   ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr);
36   ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr);
37 
38   ec = 0;
39   for (j=0; j<size; j++) {
40     for (i=owners[j]; i<owners[j+1]; i++) {
41       if (indices[i]) {
42         garray[ec]   = i;
43         ec_owner[ec] = j;
44         ec++;
45       }
46     }
47   }
48 
49   /* make indices now point into garray */
50   for (i=0; i<ec; i++) indices[garray[i]] = i;
51 
52   /* compact out the extra columns in B */
53   for (i=0; i<mbs; i++) {
54     for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
55   }
56   B->nbs = ec;
57   ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr);
58   ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr);
59   ierr = PetscFree(indices);CHKERRQ(ierr);
60 
61   /* create local vector that is used to scatter into */
62   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr);
63 
64   /* create two temporary index sets for building scatter-gather */
65   ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr);
66   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
67   for (i=0; i<ec; i++) stmp[i] = i;
68   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
69 
70   /* generate the scatter context
71      -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefull for some applications */
72   ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr);
73   ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr);
74   ierr = VecDestroy(&gvec);CHKERRQ(ierr);
75 
76   sbaij->garray = garray;
77 
78   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr);
79   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr);
80   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
81   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
82 
83   ierr = ISDestroy(&from);CHKERRQ(ierr);
84   ierr = ISDestroy(&to);CHKERRQ(ierr);
85 
86   /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
87   lsize = (mbs + ec)*bs;
88   ierr  = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr);
89   ierr  = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr);
90   ierr  = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr);
91 
92   ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr);
93 
94   /* x index in the IS sfrom */
95   for (i=0; i<ec; i++) {
96     j          = ec_owner[i];
97     sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]);
98   }
99   /* b index in the IS sfrom */
100   k = sowners[rank]/bs + mbs;
101   for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j;
102   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
103 
104   /* x index in the IS sto */
105   k = sowners[rank]/bs + mbs;
106   for (i=0; i<ec; i++) stmp[i] = (k + i);
107   /* b index in the IS sto */
108   for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec];
109 
110   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr);
111 
112   ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr);
113 
114   ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr);
115   ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr);
117   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr);
118   ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr);
119 
120   ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr);
122   ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr);
123 
124   ierr = PetscFree(stmp);CHKERRQ(ierr);
125   ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
126 
127   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr);
128   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr);
129   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr);
130   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr);
131   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr);
132   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr);
133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr);
134   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr);
135 
136   ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
137   ierr = ISDestroy(&from);CHKERRQ(ierr);
138   ierr = ISDestroy(&to);CHKERRQ(ierr);
139   ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr);
140   PetscFunctionReturn(0);
141 }
142 
143 /*
144      Takes the local part of an already assembled MPISBAIJ matrix
145    and disassembles it. This is to allow new nonzeros into the matrix
146    that require more communication in the matrix vector multiply.
147    Thus certain data-structures must be rebuilt.
148 
149    Kind of slow! But that's what application programmers get when
150    they are sloppy.
151 */
152 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
153 {
154   Mat_MPISBAIJ   *baij  = (Mat_MPISBAIJ*)A->data;
155   Mat            B      = baij->B,Bnew;
156   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
157   PetscErrorCode ierr;
158   PetscInt       i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray;
159   PetscInt       k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n;
160   MatScalar      *a = Bbaij->a;
161   PetscScalar    *atmp;
162 #if defined(PETSC_USE_REAL_MAT_SINGLE)
163   PetscInt l;
164 #endif
165 
166   PetscFunctionBegin;
167 #if defined(PETSC_USE_REAL_MAT_SINGLE)
168   ierr = PetscMalloc1(A->rmap->bs,&atmp);
169 #endif
170   /* free stuff related to matrix-vec multiply */
171   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
172   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
173   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
174 
175   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
176   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
177   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
178   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
179   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
180 
181   if (baij->colmap) {
182 #if defined(PETSC_USE_CTABLE)
183     ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
184 #else
185     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
186     ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
187 #endif
188   }
189 
190   /* make sure that B is assembled so we can access its values */
191   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
192   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
193 
194   /* invent new B and copy stuff over */
195   ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr);
196   for (i=0; i<mbs; i++) {
197     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
198   }
199   ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr);
200   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
201   ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr);
202   ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr);
203   ierr = PetscFree(nz);CHKERRQ(ierr);
204 
205   if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
206     ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew;
207   }
208 
209   /*
210    Ensure that B's nonzerostate is monotonically increasing.
211    Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
212    */
213   Bnew->nonzerostate = B->nonzerostate;
214   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
215   for (i=0; i<mbs; i++) {
216     rvals[0] = bs*i;
217     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
218     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
219       col = garray[Bbaij->j[j]]*bs;
220       for (k=0; k<bs; k++) {
221 #if defined(PETSC_USE_REAL_MAT_SINGLE)
222         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
223 #else
224         atmp = a+j*bs2 + k*bs;
225 #endif
226         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
227         col++;
228       }
229     }
230   }
231 #if defined(PETSC_USE_REAL_MAT_SINGLE)
232   ierr = PetscFree(atmp);CHKERRQ(ierr);
233 #endif
234   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
235 
236   baij->garray = 0;
237 
238   ierr = PetscFree(rvals);CHKERRQ(ierr);
239   ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
240   ierr = MatDestroy(&B);CHKERRQ(ierr);
241   ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr);
242 
243   baij->B = Bnew;
244 
245   A->was_assembled = PETSC_FALSE;
246   PetscFunctionReturn(0);
247 }
248