xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 3a40ed3dce77c081171d005ae1a6ff4bb9d13b6f)
1 #ifdef PETSC_RCS_HEADER
2 static char vcid[] = "$Id: mmbaij.c,v 1.14 1997/07/09 20:55:26 balay Exp bsmith $";
3 #endif
4 
5 
6 /*
7    Support for the parallel BAIJ matrix vector multiply
8 */
9 #include "src/mat/impls/baij/mpi/mpibaij.h"
10 #include "src/vec/vecimpl.h"
11 
12 #undef __FUNC__
13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
14 int MatSetUpMultiply_MPIBAIJ(Mat mat)
15 {
16   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data;
17   Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data);
18   int        Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
19   int        col,bs = baij->bs,*tmp,*stmp;
20   IS         from,to;
21   Vec        gvec;
22 
23   PetscFunctionBegin;
24   /* For the first stab we make an array as long as the number of columns */
25   /* mark those columns that are in baij->B */
26   indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices);
27   PetscMemzero(indices,Nbs*sizeof(int));
28   for ( i=0; i<B->mbs; i++ ) {
29     for ( j=0; j<B->ilen[i]; j++ ) {
30       if (!indices[aj[B->i[i] + j]]) ec++;
31       indices[aj[B->i[i] + j] ] = 1;
32     }
33   }
34 
35   /* form array of columns we need */
36   garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray);
37   tmp    = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp)
38   ec = 0;
39   for ( i=0; i<Nbs; i++ ) {
40     if (indices[i]) garray[ec++] = i;
41   }
42 
43   /* make indices now point into garray */
44   for ( i=0; i<ec; i++ ) {
45     indices[garray[i]] = i;
46   }
47 
48   /* compact out the extra columns in B */
49   for ( i=0; i<B->mbs; i++ ) {
50     for ( j=0; j<B->ilen[i]; j++ ) {
51       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
52     }
53   }
54   B->nbs = ec;
55   B->n   = ec*B->bs;
56   PetscFree(indices);
57 
58   for ( i=0,col=0; i<ec; i++ ) {
59     for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
60   }
61   /* create local vector that is used to scatter into */
62   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr);
63 
64   /* create two temporary index sets for building scatter-gather */
65 
66   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */
67   for ( i=0,col=0; i<ec; i++ ) {
68     garray[i] = bs*garray[i];
69   }
70   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
71   for ( i=0,col=0; i<ec; i++ ) {
72     garray[i] = garray[i]/bs;
73   }
74 
75   stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp);
76   for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; }
77   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
78   PetscFree(stmp);
79 
80   /* create temporary global vector to generate scatter context */
81   /* this is inefficient, but otherwise we must do either
82      1) save garray until the first actual scatter when the vector is known or
83      2) have another way of generating a scatter context without a vector.*/
84   ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr);
85 
86   /* gnerate the scatter context */
87   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
88 
89   /*
90       Post the receives for the first matrix vector product. We sync-chronize after
91     this on the chance that the user immediately calls MatMult() after assemblying
92     the matrix.
93   */
94   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
95   MPI_Barrier(mat->comm);
96 
97   PLogObjectParent(mat,baij->Mvctx);
98   PLogObjectParent(mat,baij->lvec);
99   PLogObjectParent(mat,from);
100   PLogObjectParent(mat,to);
101   baij->garray = garray;
102   PLogObjectMemory(mat,(ec+1)*sizeof(int));
103   ierr = ISDestroy(from); CHKERRQ(ierr);
104   ierr = ISDestroy(to); CHKERRQ(ierr);
105   ierr = VecDestroy(gvec);
106   PetscFree(tmp);
107   PetscFunctionReturn(0);
108 }
109 
110 
111 /*
112      Takes the local part of an already assembled MPIBAIJ matrix
113    and disassembles it. This is to allow new nonzeros into the matrix
114    that require more communication in the matrix vector multiply.
115    Thus certain data-structures must be rebuilt.
116 
117    Kind of slow! But that's what application programmers get when
118    they are sloppy.
119 */
120 #undef __FUNC__
121 #define __FUNC__ "DisAssemble_MPIBAIJ"
122 int DisAssemble_MPIBAIJ(Mat A)
123 {
124   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data;
125   Mat        B = baij->B,Bnew;
126   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
127   int        ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray;
128   int        k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m;
129   Scalar     *a=Bbaij->a;
130 
131   PetscFunctionBegin;
132   /* free stuff related to matrix-vec multiply */
133   ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */
134   ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0;
135   ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0;
136   if (baij->colmap) {
137     PetscFree(baij->colmap); baij->colmap = 0;
138     PLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
139   }
140 
141   /* make sure that B is assembled so we can access its values */
142   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
143   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
144 
145   /* invent new B and copy stuff over */
146   nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz);
147   for ( i=0; i<mbs; i++ ) {
148     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
149   }
150   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr);
151   PetscFree(nz);
152 
153   rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals);
154   for ( i=0; i<mbs; i++ ) {
155     rvals[0] = bs*i;
156     for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; }
157     for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) {
158       col = garray[Bbaij->j[j]]*bs;
159       for (k=0; k<bs; k++ ) {
160         ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr);
161         col++;
162       }
163     }
164   }
165   PetscFree(baij->garray); baij->garray = 0;
166   PetscFree(rvals);
167   PLogObjectMemory(A,-ec*sizeof(int));
168   ierr = MatDestroy(B); CHKERRQ(ierr);
169   PLogObjectParent(A,Bnew);
170   baij->B = Bnew;
171   A->was_assembled = PETSC_FALSE;
172   PetscFunctionReturn(0);
173 }
174 
175 
176