1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mmbaij.c,v 1.18 1999/01/08 18:18:13 balay Exp balay $"; 3 #endif 4 5 6 /* 7 Support for the parallel BAIJ matrix vector multiply 8 */ 9 #include "src/mat/impls/baij/mpi/mpibaij.h" 10 #include "src/vec/vecimpl.h" 11 12 #undef __FUNC__ 13 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 14 int MatSetUpMultiply_MPIBAIJ(Mat mat) 15 { 16 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 17 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 18 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 19 int col,bs = baij->bs,*tmp,*stmp; 20 IS from,to; 21 Vec gvec; 22 #if defined (USE_CTABLE) 23 Table gid1_lid1; 24 CTablePos tpos; 25 int gid, lid; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (USE_CTABLE) 31 /* use a table - Mark Adams */ 32 TableCreate( &gid1_lid1, B->mbs ); 33 for ( i=0; i<B->mbs; i++ ) { 34 for ( j=0; j<B->ilen[i]; j++ ) { 35 int gid1 = aj[B->i[i] + j] + 1; 36 if ( !TableFind( gid1_lid1, gid1 ) ){ 37 /* one based table */ 38 ierr = TableAdd( gid1_lid1, gid1, ++ec ); CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 44 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp); 45 ierr = TableGetHeadPosition( gid1_lid1, &tpos ); CHKERRQ(ierr); 46 while( tpos ) { 47 ierr = TableGetNext( gid1_lid1, &tpos, &gid, &lid ); CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 qsort( garray, ec, sizeof(int), intcomparc ); /* sort */ 52 TableRemoveAll( gid1_lid1 ); 53 for ( i=0; i<ec; i++ ) { 54 ierr = TableAdd( gid1_lid1, garray[i] + 1, i + 1 ); CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for ( i=0; i<B->mbs; i++ ) { 58 for ( j=0; j<B->ilen[i]; j++ ) { 59 int gid1 = aj[B->i[i] + j] + 1; 60 lid = TableFind( gid1_lid1, gid1 ) - 1; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 B->nbs = ec; 65 B->n = ec*B->bs; 66 TableDelete(gid1_lid1); 67 /* Mark Adams */ 68 #else 69 /* For the first stab we make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) ); CHKPTRQ(indices); 72 PetscMemzero(indices,Nbs*sizeof(int)); 73 for ( i=0; i<B->mbs; i++ ) { 74 for ( j=0; j<B->ilen[i]; j++ ) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j] ] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(garray); 82 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) ); CHKPTRQ(tmp) 83 ec = 0; 84 for ( i=0; i<Nbs; i++ ) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for ( i=0; i<ec; i++ ) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for ( i=0; i<B->mbs; i++ ) { 97 for ( j=0; j<B->ilen[i]; j++ ) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 B->n = ec*B->bs; 103 PetscFree(indices); 104 #endif 105 106 for ( i=0,col=0; i<ec; i++ ) { 107 for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 108 } 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec); CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 114 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from); CHKERRQ(ierr); */ 115 for ( i=0,col=0; i<ec; i++ ) { 116 garray[i] = bs*garray[i]; 117 } 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 119 for ( i=0,col=0; i<ec; i++ ) { 120 garray[i] = garray[i]/bs; 121 } 122 123 stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) ); CHKPTRQ(stmp); 124 for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 125 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 126 PetscFree(stmp); 127 128 /* create temporary global vector to generate scatter context */ 129 /* this is inefficient, but otherwise we must do either 130 1) save garray until the first actual scatter when the vector is known or 131 2) have another way of generating a scatter context without a vector.*/ 132 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec); CHKERRQ(ierr); 133 134 /* gnerate the scatter context */ 135 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 136 137 /* 138 Post the receives for the first matrix vector product. We sync-chronize after 139 this on the chance that the user immediately calls MatMult() after assemblying 140 the matrix. 141 */ 142 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 143 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 144 145 PLogObjectParent(mat,baij->Mvctx); 146 PLogObjectParent(mat,baij->lvec); 147 PLogObjectParent(mat,from); 148 PLogObjectParent(mat,to); 149 baij->garray = garray; 150 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 151 ierr = ISDestroy(from); CHKERRQ(ierr); 152 ierr = ISDestroy(to); CHKERRQ(ierr); 153 ierr = VecDestroy(gvec); 154 PetscFree(tmp); 155 PetscFunctionReturn(0); 156 } 157 158 159 /* 160 Takes the local part of an already assembled MPIBAIJ matrix 161 and disassembles it. This is to allow new nonzeros into the matrix 162 that require more communication in the matrix vector multiply. 163 Thus certain data-structures must be rebuilt. 164 165 Kind of slow! But that's what application programmers get when 166 they are sloppy. 167 */ 168 #undef __FUNC__ 169 #define __FUNC__ "DisAssemble_MPIBAIJ" 170 int DisAssemble_MPIBAIJ(Mat A) 171 { 172 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 173 Mat B = baij->B,Bnew; 174 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 175 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 176 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 177 Scalar *a=Bbaij->a; 178 179 PetscFunctionBegin; 180 /* free stuff related to matrix-vec multiply */ 181 ierr = VecGetSize(baij->lvec,&ec); /* needed for PLogObjectMemory below */ 182 ierr = VecDestroy(baij->lvec); CHKERRQ(ierr); baij->lvec = 0; 183 ierr = VecScatterDestroy(baij->Mvctx); CHKERRQ(ierr); baij->Mvctx = 0; 184 if (baij->colmap) { 185 #if defined (USE_CTABLE) 186 TableDelete(baij->colmap); baij->colmap = 0; 187 #else 188 PetscFree(baij->colmap); baij->colmap = 0; 189 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 190 #endif 191 } 192 193 /* make sure that B is assembled so we can access its values */ 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 195 MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 196 197 /* invent new B and copy stuff over */ 198 nz = (int *) PetscMalloc( mbs*sizeof(int) ); CHKPTRQ(nz); 199 for ( i=0; i<mbs; i++ ) { 200 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 201 } 202 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew); CHKERRQ(ierr); 203 PetscFree(nz); 204 205 rvals = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(rvals); 206 for ( i=0; i<mbs; i++ ) { 207 rvals[0] = bs*i; 208 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 209 for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 210 col = garray[Bbaij->j[j]]*bs; 211 for (k=0; k<bs; k++ ) { 212 ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,INSERT_VALUES);CHKERRQ(ierr); 213 col++; 214 } 215 } 216 } 217 PetscFree(baij->garray); baij->garray = 0; 218 PetscFree(rvals); 219 PLogObjectMemory(A,-ec*sizeof(int)); 220 ierr = MatDestroy(B); CHKERRQ(ierr); 221 PLogObjectParent(A,Bnew); 222 baij->B = Bnew; 223 A->was_assembled = PETSC_FALSE; 224 PetscFunctionReturn(0); 225 } 226 227 228