1 /*$Id: mmbaij.c,v 1.30 2000/04/09 03:09:57 bsmith Exp bsmith $*/ 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 #undef __FUNC__ 10 #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPIBAIJ" 11 int MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 16 int col,bs = baij->bs,*tmp,*stmp; 17 IS from,to; 18 Vec gvec; 19 #if defined (PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 int gid,lid; 23 #endif 24 25 PetscFunctionBegin; 26 27 #if defined (PETSC_USE_CTABLE) 28 /* use a table - Mark Adams */ 29 PetscTableCreate(B->mbs,&gid1_lid1); 30 for (i=0; i<B->mbs; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 int data,gid1 = aj[B->i[i]+j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 42 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 50 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<B->mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 int gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 B->n = ec*B->bs; 66 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 67 /* Mark Adams */ 68 #else 69 /* For the first stab we make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices); 72 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 73 for (i=0; i<B->mbs; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j] ] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 82 tmp = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp) 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 B->n = ec*B->bs; 103 ierr = PetscFree(indices);CHKERRQ(ierr); 104 #endif 105 106 for (i=0,col=0; i<ec; i++) { 107 for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 108 } 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 114 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 115 for (i=0,col=0; i<ec; i++) { 116 garray[i] = bs*garray[i]; 117 } 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 119 for (i=0,col=0; i<ec; i++) { 120 garray[i] = garray[i]/bs; 121 } 122 123 stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp); 124 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 125 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 126 ierr = PetscFree(stmp);CHKERRQ(ierr); 127 128 /* create temporary global vector to generate scatter context */ 129 /* this is inefficient, but otherwise we must do either 130 1) save garray until the first actual scatter when the vector is known or 131 2) have another way of generating a scatter context without a vector.*/ 132 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 133 134 /* gnerate the scatter context */ 135 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 136 137 /* 138 Post the receives for the first matrix vector product. We sync-chronize after 139 this on the chance that the user immediately calls MatMult() after assemblying 140 the matrix. 141 */ 142 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 143 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 144 145 PLogObjectParent(mat,baij->Mvctx); 146 PLogObjectParent(mat,baij->lvec); 147 PLogObjectParent(mat,from); 148 PLogObjectParent(mat,to); 149 baij->garray = garray; 150 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 151 ierr = ISDestroy(from);CHKERRQ(ierr); 152 ierr = ISDestroy(to);CHKERRQ(ierr); 153 ierr = VecDestroy(gvec);CHKERRQ(ierr); 154 ierr = PetscFree(tmp);CHKERRQ(ierr); 155 PetscFunctionReturn(0); 156 } 157 158 159 /* 160 Takes the local part of an already assembled MPIBAIJ matrix 161 and disassembles it. This is to allow new nonzeros into the matrix 162 that require more communication in the matrix vector multiply. 163 Thus certain data-structures must be rebuilt. 164 165 Kind of slow! But that's what application programmers get when 166 they are sloppy. 167 */ 168 #undef __FUNC__ 169 #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPIBAIJ" 170 int DisAssemble_MPIBAIJ(Mat A) 171 { 172 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 173 Mat B = baij->B,Bnew; 174 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 175 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 176 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 177 MatScalar *a = Bbaij->a; 178 #if defined(PETSC_USE_MAT_SINGLE) 179 Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 180 int l; 181 #endif 182 183 PetscFunctionBegin; 184 /* free stuff related to matrix-vec multiply */ 185 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 186 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 187 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 188 if (baij->colmap) { 189 #if defined (PETSC_USE_CTABLE) 190 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 191 #else 192 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 193 baij->colmap = 0; 194 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 195 #endif 196 } 197 198 /* make sure that B is assembled so we can access its values */ 199 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 201 202 /* invent new B and copy stuff over */ 203 nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz); 204 for (i=0; i<mbs; i++) { 205 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 206 } 207 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 208 ierr = PetscFree(nz);CHKERRQ(ierr); 209 210 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 211 for (i=0; i<mbs; i++) { 212 rvals[0] = bs*i; 213 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 214 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 215 col = garray[Bbaij->j[j]]*bs; 216 for (k=0; k<bs; k++) { 217 #if defined(PETSC_USE_MAT_SINGLE) 218 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 219 #else 220 atmp = a+j*bs2; 221 #endif 222 ierr = MatSetValues(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 223 col++; 224 } 225 } 226 } 227 #if defined(PETSC_USE_MAT_SINGLE) 228 ierr = PetscFree(atmp);CHKERRQ(ierr); 229 #endif 230 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 231 baij->garray = 0; 232 ierr = PetscFree(rvals);CHKERRQ(ierr); 233 PLogObjectMemory(A,-ec*sizeof(int)); 234 ierr = MatDestroy(B);CHKERRQ(ierr); 235 PLogObjectParent(A,Bnew); 236 baij->B = Bnew; 237 A->was_assembled = PETSC_FALSE; 238 PetscFunctionReturn(0); 239 } 240 241 242 243