1 /*$Id: mmbaij.c,v 1.33 2000/05/10 16:40:56 bsmith Exp bsmith $*/ 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 9 10 #undef __FUNC__ 11 #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ" 12 int MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17 int col,bs = baij->bs,*tmp,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 int gid,lid; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 PetscTableCreate(B->mbs,&gid1_lid1); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 int data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 43 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 int gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 B->n = ec*B->bs; 67 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* For the first stab we make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices); 73 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 83 tmp = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp) 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 B->n = ec*B->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 for (i=0,col=0; i<ec; i++) { 108 for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109 } 110 /* create local vector that is used to scatter into */ 111 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 112 113 /* create two temporary index sets for building scatter-gather */ 114 for (i=0,col=0; i<ec; i++) { 115 garray[i] = bs*garray[i]; 116 } 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 118 for (i=0,col=0; i<ec; i++) { 119 garray[i] = garray[i]/bs; 120 } 121 122 stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp); 123 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 124 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 125 ierr = PetscFree(stmp);CHKERRQ(ierr); 126 127 /* create temporary global vector to generate scatter context */ 128 /* this is inefficient, but otherwise we must do either 129 1) save garray until the first actual scatter when the vector is known or 130 2) have another way of generating a scatter context without a vector.*/ 131 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 132 133 /* gnerate the scatter context */ 134 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 135 136 /* 137 Post the receives for the first matrix vector product. We sync-chronize after 138 this on the chance that the user immediately calls MatMult() after assemblying 139 the matrix. 140 */ 141 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 142 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 143 144 PLogObjectParent(mat,baij->Mvctx); 145 PLogObjectParent(mat,baij->lvec); 146 PLogObjectParent(mat,from); 147 PLogObjectParent(mat,to); 148 baij->garray = garray; 149 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 150 ierr = ISDestroy(from);CHKERRQ(ierr); 151 ierr = ISDestroy(to);CHKERRQ(ierr); 152 ierr = VecDestroy(gvec);CHKERRQ(ierr); 153 ierr = PetscFree(tmp);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 158 /* 159 Takes the local part of an already assembled MPIBAIJ matrix 160 and disassembles it. This is to allow new nonzeros into the matrix 161 that require more communication in the matrix vector multiply. 162 Thus certain data-structures must be rebuilt. 163 164 Kind of slow! But that's what application programmers get when 165 they are sloppy. 166 */ 167 #undef __FUNC__ 168 #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ" 169 int DisAssemble_MPIBAIJ(Mat A) 170 { 171 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 172 Mat B = baij->B,Bnew; 173 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 174 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 175 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 176 MatScalar *a = Bbaij->a; 177 #if defined(PETSC_USE_MAT_SINGLE) 178 Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 179 int l; 180 #else 181 Scalar *atmp; 182 #endif 183 184 PetscFunctionBegin; 185 /* free stuff related to matrix-vec multiply */ 186 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 187 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 188 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 189 if (baij->colmap) { 190 #if defined (PETSC_USE_CTABLE) 191 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 192 #else 193 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 194 baij->colmap = 0; 195 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 196 #endif 197 } 198 199 /* make sure that B is assembled so we can access its values */ 200 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 201 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 202 203 /* invent new B and copy stuff over */ 204 nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz); 205 for (i=0; i<mbs; i++) { 206 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 207 } 208 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 209 ierr = PetscFree(nz);CHKERRQ(ierr); 210 211 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 212 for (i=0; i<mbs; i++) { 213 rvals[0] = bs*i; 214 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 215 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 216 col = garray[Bbaij->j[j]]*bs; 217 for (k=0; k<bs; k++) { 218 #if defined(PETSC_USE_MAT_SINGLE) 219 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 220 #else 221 atmp = a+j*bs2; 222 #endif 223 ierr = MatSetValues_SeqBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 224 col++; 225 } 226 } 227 } 228 #if defined(PETSC_USE_MAT_SINGLE) 229 ierr = PetscFree(atmp);CHKERRQ(ierr); 230 #endif 231 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 232 baij->garray = 0; 233 ierr = PetscFree(rvals);CHKERRQ(ierr); 234 PLogObjectMemory(A,-ec*sizeof(int)); 235 ierr = MatDestroy(B);CHKERRQ(ierr); 236 PLogObjectParent(A,Bnew); 237 baij->B = Bnew; 238 A->was_assembled = PETSC_FALSE; 239 PetscFunctionReturn(0); 240 } 241 242 243 244