1 /*$Id: mmbaij.c,v 1.27 1999/10/13 20:37:30 bsmith Exp bsmith $*/ 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 #undef __FUNC__ 10 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 11 int MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 15 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 16 int col,bs = baij->bs,*tmp,*stmp; 17 IS from,to; 18 Vec gvec; 19 #if defined (PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 int gid, lid; 23 #endif 24 25 PetscFunctionBegin; 26 27 #if defined (PETSC_USE_CTABLE) 28 /* use a table - Mark Adams */ 29 PetscTableCreate(B->mbs,&gid1_lid1); 30 for ( i=0; i<B->mbs; i++ ) { 31 for ( j=0; j<B->ilen[i]; j++ ) { 32 int data,gid1 = aj[B->i[i]+j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 42 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 50 /* qsort( garray, ec, sizeof(int), intcomparcarc ); */ 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for ( i=0; i<ec; i++ ) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for ( i=0; i<B->mbs; i++ ) { 57 for ( j=0; j<B->ilen[i]; j++ ) { 58 int gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 B->n = ec*B->bs; 66 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 67 /* Mark Adams */ 68 #else 69 /* For the first stab we make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) );CHKPTRQ(indices); 72 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 73 for ( i=0; i<B->mbs; i++ ) { 74 for ( j=0; j<B->ilen[i]; j++ ) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j] ] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(garray); 82 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) );CHKPTRQ(tmp) 83 ec = 0; 84 for ( i=0; i<Nbs; i++ ) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for ( i=0; i<ec; i++ ) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for ( i=0; i<B->mbs; i++ ) { 97 for ( j=0; j<B->ilen[i]; j++ ) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 B->n = ec*B->bs; 103 ierr = PetscFree(indices);CHKERRQ(ierr); 104 #endif 105 106 for ( i=0,col=0; i<ec; i++ ) { 107 for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 108 } 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 114 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 115 for ( i=0,col=0; i<ec; i++ ) { 116 garray[i] = bs*garray[i]; 117 } 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 119 for ( i=0,col=0; i<ec; i++ ) { 120 garray[i] = garray[i]/bs; 121 } 122 123 stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(stmp); 124 for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 125 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 126 ierr = PetscFree(stmp);CHKERRQ(ierr); 127 128 /* create temporary global vector to generate scatter context */ 129 /* this is inefficient, but otherwise we must do either 130 1) save garray until the first actual scatter when the vector is known or 131 2) have another way of generating a scatter context without a vector.*/ 132 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 133 134 /* gnerate the scatter context */ 135 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 136 137 /* 138 Post the receives for the first matrix vector product. We sync-chronize after 139 this on the chance that the user immediately calls MatMult() after assemblying 140 the matrix. 141 */ 142 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 143 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 144 145 PLogObjectParent(mat,baij->Mvctx); 146 PLogObjectParent(mat,baij->lvec); 147 PLogObjectParent(mat,from); 148 PLogObjectParent(mat,to); 149 baij->garray = garray; 150 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 151 ierr = ISDestroy(from);CHKERRQ(ierr); 152 ierr = ISDestroy(to);CHKERRQ(ierr); 153 ierr = VecDestroy(gvec);CHKERRQ(ierr); 154 ierr = PetscFree(tmp);CHKERRQ(ierr); 155 PetscFunctionReturn(0); 156 } 157 158 159 /* 160 Takes the local part of an already assembled MPIBAIJ matrix 161 and disassembles it. This is to allow new nonzeros into the matrix 162 that require more communication in the matrix vector multiply. 163 Thus certain data-structures must be rebuilt. 164 165 Kind of slow! But that's what application programmers get when 166 they are sloppy. 167 */ 168 #undef __FUNC__ 169 #define __FUNC__ "DisAssemble_MPIBAIJ" 170 int DisAssemble_MPIBAIJ(Mat A) 171 { 172 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 173 Mat B = baij->B,Bnew; 174 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 175 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 176 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 177 Scalar *a=Bbaij->a; 178 179 PetscFunctionBegin; 180 /* free stuff related to matrix-vec multiply */ 181 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 182 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 183 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 184 if (baij->colmap) { 185 #if defined (PETSC_USE_CTABLE) 186 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 187 #else 188 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 189 baij->colmap = 0; 190 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 191 #endif 192 } 193 194 /* make sure that B is assembled so we can access its values */ 195 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197 198 /* invent new B and copy stuff over */ 199 nz = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(nz); 200 for ( i=0; i<mbs; i++ ) { 201 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 202 } 203 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 204 ierr = PetscFree(nz);CHKERRQ(ierr); 205 206 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 207 for ( i=0; i<mbs; i++ ) { 208 rvals[0] = bs*i; 209 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 210 for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 211 col = garray[Bbaij->j[j]]*bs; 212 for (k=0; k<bs; k++ ) { 213 ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr); 214 col++; 215 } 216 } 217 } 218 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 219 baij->garray = 0; 220 ierr = PetscFree(rvals);CHKERRQ(ierr); 221 PLogObjectMemory(A,-ec*sizeof(int)); 222 ierr = MatDestroy(B);CHKERRQ(ierr); 223 PLogObjectParent(A,Bnew); 224 baij->B = Bnew; 225 A->was_assembled = PETSC_FALSE; 226 PetscFunctionReturn(0); 227 } 228 229 230