1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: mmbaij.c,v 1.26 1999/06/30 23:51:57 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Support for the parallel BAIJ matrix vector multiply 7 */ 8 #include "src/mat/impls/baij/mpi/mpibaij.h" 9 #include "src/vec/vecimpl.h" 10 11 #undef __FUNC__ 12 #define __FUNC__ "MatSetUpMultiply_MPIBAIJ" 13 int MatSetUpMultiply_MPIBAIJ(Mat mat) 14 { 15 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *) (baij->B->data); 17 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 18 int col,bs = baij->bs,*tmp,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 int gid, lid; 25 #endif 26 27 PetscFunctionBegin; 28 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 PetscTableCreate(B->mbs,&gid1_lid1); 32 for ( i=0; i<B->mbs; i++ ) { 33 for ( j=0; j<B->ilen[i]; j++ ) { 34 int data,gid1 = aj[B->i[i]+j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 44 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 /* qsort( garray, ec, sizeof(int), intcomparcarc ); */ 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for ( i=0; i<ec; i++ ) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for ( i=0; i<B->mbs; i++ ) { 59 for ( j=0; j<B->ilen[i]; j++ ) { 60 int gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 B->n = ec*B->bs; 68 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 69 /* Mark Adams */ 70 #else 71 /* For the first stab we make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 indices = (int *) PetscMalloc( (Nbs+1)*sizeof(int) );CHKPTRQ(indices); 74 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 75 for ( i=0; i<B->mbs; i++ ) { 76 for ( j=0; j<B->ilen[i]; j++ ) { 77 if (!indices[aj[B->i[i] + j]]) ec++; 78 indices[aj[B->i[i] + j] ] = 1; 79 } 80 } 81 82 /* form array of columns we need */ 83 garray = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(garray); 84 tmp = (int *) PetscMalloc( (ec*bs+1)*sizeof(int) );CHKPTRQ(tmp) 85 ec = 0; 86 for ( i=0; i<Nbs; i++ ) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for ( i=0; i<ec; i++ ) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for ( i=0; i<B->mbs; i++ ) { 99 for ( j=0; j<B->ilen[i]; j++ ) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 B->n = ec*B->bs; 105 ierr = PetscFree(indices);CHKERRQ(ierr); 106 #endif 107 108 for ( i=0,col=0; i<ec; i++ ) { 109 for ( j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 110 } 111 /* create local vector that is used to scatter into */ 112 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 113 114 /* create two temporary index sets for building scatter-gather */ 115 116 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 117 for ( i=0,col=0; i<ec; i++ ) { 118 garray[i] = bs*garray[i]; 119 } 120 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 121 for ( i=0,col=0; i<ec; i++ ) { 122 garray[i] = garray[i]/bs; 123 } 124 125 stmp = (int *) PetscMalloc( (ec+1)*sizeof(int) );CHKPTRQ(stmp); 126 for ( i=0; i<ec; i++ ) { stmp[i] = bs*i; } 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 128 ierr = PetscFree(stmp);CHKERRQ(ierr); 129 130 /* create temporary global vector to generate scatter context */ 131 /* this is inefficient, but otherwise we must do either 132 1) save garray until the first actual scatter when the vector is known or 133 2) have another way of generating a scatter context without a vector.*/ 134 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 135 136 /* gnerate the scatter context */ 137 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 138 139 /* 140 Post the receives for the first matrix vector product. We sync-chronize after 141 this on the chance that the user immediately calls MatMult() after assemblying 142 the matrix. 143 */ 144 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 145 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 146 147 PLogObjectParent(mat,baij->Mvctx); 148 PLogObjectParent(mat,baij->lvec); 149 PLogObjectParent(mat,from); 150 PLogObjectParent(mat,to); 151 baij->garray = garray; 152 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 153 ierr = ISDestroy(from);CHKERRQ(ierr); 154 ierr = ISDestroy(to);CHKERRQ(ierr); 155 ierr = VecDestroy(gvec);CHKERRQ(ierr); 156 ierr = PetscFree(tmp);CHKERRQ(ierr); 157 PetscFunctionReturn(0); 158 } 159 160 161 /* 162 Takes the local part of an already assembled MPIBAIJ matrix 163 and disassembles it. This is to allow new nonzeros into the matrix 164 that require more communication in the matrix vector multiply. 165 Thus certain data-structures must be rebuilt. 166 167 Kind of slow! But that's what application programmers get when 168 they are sloppy. 169 */ 170 #undef __FUNC__ 171 #define __FUNC__ "DisAssemble_MPIBAIJ" 172 int DisAssemble_MPIBAIJ(Mat A) 173 { 174 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *) A->data; 175 Mat B = baij->B,Bnew; 176 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 177 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 178 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 179 Scalar *a=Bbaij->a; 180 181 PetscFunctionBegin; 182 /* free stuff related to matrix-vec multiply */ 183 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 184 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 185 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 186 if (baij->colmap) { 187 #if defined (PETSC_USE_CTABLE) 188 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 189 #else 190 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 191 baij->colmap = 0; 192 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 193 #endif 194 } 195 196 /* make sure that B is assembled so we can access its values */ 197 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 200 /* invent new B and copy stuff over */ 201 nz = (int *) PetscMalloc( mbs*sizeof(int) );CHKPTRQ(nz); 202 for ( i=0; i<mbs; i++ ) { 203 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 204 } 205 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 206 ierr = PetscFree(nz);CHKERRQ(ierr); 207 208 rvals = (int *) PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 209 for ( i=0; i<mbs; i++ ) { 210 rvals[0] = bs*i; 211 for ( j=1; j<bs; j++ ) { rvals[j] = rvals[j-1] + 1; } 212 for ( j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++ ) { 213 col = garray[Bbaij->j[j]]*bs; 214 for (k=0; k<bs; k++ ) { 215 ierr = MatSetValues(Bnew,bs,rvals,1,&col,a+j*bs2,B->insertmode);CHKERRQ(ierr); 216 col++; 217 } 218 } 219 } 220 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 221 baij->garray = 0; 222 ierr = PetscFree(rvals);CHKERRQ(ierr); 223 PLogObjectMemory(A,-ec*sizeof(int)); 224 ierr = MatDestroy(B);CHKERRQ(ierr); 225 PLogObjectParent(A,Bnew); 226 baij->B = Bnew; 227 A->was_assembled = PETSC_FALSE; 228 PetscFunctionReturn(0); 229 } 230 231 232