1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mmbaij.c,v 1.25 1999/06/08 22:56:19 balay Exp balay $"; 3 #endif 4 5 6 /* 7 Support for the parallel BAIJ matrix vector multiply 8 */ 9 #include "src/mat/impls/baij/mpi/mpibaij.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14 int MatSetUpMultiply_MPIBAIJ(Mat mat) 15 { 16 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19 int col,bs = baij->bs,*tmp,*stmp; 20 IS from,to; 21 Vec gvec; 22 #if defined (PETSC_USE_CTABLE) 23 Table gid1_lid1; 24 CTablePos tpos; 25 int gid, lid; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 TableCreate(B->mbs,&gid1_lid1); 33 for ( i=0; i<B->mbs; i++ ) { 34 for ( j=0; j<B->ilen[i]; j++ ) { 35 int data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = TableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = TableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 45 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 46 ierr = TableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = TableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 /* qsort( garray, ec, sizeof(int), intcomparcarc ); */ 54 TableRemoveAll(gid1_lid1); 55 for ( i=0; i<ec; i++ ) { 56 ierr = TableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 57 } 58 /* compact out the extra columns in B */ 59 for ( i=0; i<B->mbs; i++ ) { 60 for ( j=0; j<B->ilen[i]; j++ ) { 61 int gid1 = aj[B->i[i] + j] + 1; 62 ierr = TableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 63 lid --; 64 aj[B->i[i]+j] = lid; 65 } 66 } 67 B->nbs = ec; 68 B->n = ec*B->bs; 69 TableDelete(gid1_lid1); 70 /* Mark Adams */ 71 #else 72 /* For the first stab we make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) );CHKPTRQ(indices); 75 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 76 for ( i=0; i<B->mbs; i++ ) { 77 for ( j=0; j<B->ilen[i]; j++ ) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j] ] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(garray); 85 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) );CHKPTRQ(tmp) 86 ec = 0; 87 for ( i=0; i<Nbs; i++ ) { 88 if (indices[i]) { 89 garray[ec++] = i; 90 } 91 } 92 93 /* make indices now point into garray */ 94 for ( i=0; i<ec; i++ ) { 95 indices[garray[i]] = i; 96 } 97 98 /* compact out the extra columns in B */ 99 for ( i=0; i<B->mbs; i++ ) { 100 for ( j=0; j<B->ilen[i]; j++ ) { 101 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 102 } 103 } 104 B->nbs = ec; 105 B->n = ec*B->bs; 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 for ( i=0,col=0; i<ec; i++ ) { 110 for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 111 } 112 /* create local vector that is used to scatter into */ 113 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 114 115 /* create two temporary index sets for building scatter-gather */ 116 117 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 118 for ( i=0,col=0; i<ec; i++ ) { 119 garray[i] = bs*garray[i]; 120 } 121 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 122 for ( i=0,col=0; i<ec; i++ ) { 123 garray[i] = garray[i]/bs; 124 } 125 126 stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(stmp); 127 for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 128 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 129 ierr = PetscFree(stmp);CHKERRQ(ierr); 130 131 /* create temporary global vector to generate scatter context */ 132 /* this is inefficient, but otherwise we must do either 133 1) save garray until the first actual scatter when the vector is known or 134 2) have another way of generating a scatter context without a vector.*/ 135 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 136 137 /* gnerate the scatter context */ 138 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 139 140 /* 141 Post the receives for the first matrix vector product. We sync-chronize after 142 this on the chance that the user immediately calls MatMult() after assemblying 143 the matrix. 144 */ 145 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 146 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 147 148 PLogObjectParent(mat,baij->Mvctx); 149 PLogObjectParent(mat,baij->lvec); 150 PLogObjectParent(mat,from); 151 PLogObjectParent(mat,to); 152 baij->garray = garray; 153 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 154 ierr = ISDestroy(from);CHKERRQ(ierr); 155 ierr = ISDestroy(to);CHKERRQ(ierr); 156 ierr = VecDestroy(gvec);CHKERRQ(ierr); 157 ierr = PetscFree(tmp);CHKERRQ(ierr); 158 PetscFunctionReturn(0); 159 } 160 161 162 /* 163 Takes the local part of an already assembled MPIBAIJ matrix 164 and disassembles it. This is to allow new nonzeros into the matrix 165 that require more communication in the matrix vector multiply. 166 Thus certain data-structures must be rebuilt. 167 168 Kind of slow! But that's what application programmers get when 169 they are sloppy. 170 */ 171 #undef __FUNC__ 172 #define __FUNC__ "DisAssemble_MPIBAIJ" 173 int DisAssemble_MPIBAIJ(Mat A) 174 { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 176 Mat B = baij->B,Bnew; 177 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 178 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 179 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 180 Scalar *a=Bbaij->a; 181 182 PetscFunctionBegin; 183 /* free stuff related to matrix-vec multiply */ 184 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 185 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 186 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 187 if (baij->colmap) { 188 #if defined (PETSC_USE_CTABLE) 189 TableDelete(baij->colmap); baij->colmap = 0; 190 #else 191 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 192 baij->colmap = 0; 193 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 194 #endif 195 } 196 197 /* make sure that B is assembled so we can access its values */ 198 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 201 /* invent new B and copy stuff over */ 202 nz = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(nz); 203 for ( i=0; i<mbs; i++ ) { 204 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 205 } 206 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 207 ierr = PetscFree(nz);CHKERRQ(ierr); 208 209 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 210 for ( i=0; i<mbs; i++ ) { 211 rvals[0] = bs*i; 212 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 213 for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 214 col = garray[Bbaij->j[j]]*bs; 215 for (k=0; k<bs; k++ ) { 216 ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr); 217 col++; 218 } 219 } 220 } 221 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 222 baij->garray = 0; 223 ierr = PetscFree(rvals);CHKERRQ(ierr); 224 PLogObjectMemory(A,-ec*sizeof(int)); 225 ierr = MatDestroy(B);CHKERRQ(ierr); 226 PLogObjectParent(A,Bnew); 227 baij->B = Bnew; 228 A->was_assembled = PETSC_FALSE; 229 PetscFunctionReturn(0); 230 } 231 232 233