1 /*$Id: mmsbaij.c,v 1.5 2001/01/16 18:18:09 balay Exp bsmith $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 9 10 #undef __FUNC__ 11 #define __FUNC__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17 int col,bs = baij->bs,*tmp,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 int gid,lid; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 PetscTableCreate(B->mbs,&gid1_lid1); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 int data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 43 ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 int gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->n = ec*B->bs; 67 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* For the first stab we make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 83 ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->n = ec*B->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 for (i=0,col=0; i<ec; i++) { 108 for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109 } 110 /* create local vector that is used to scatter into */ 111 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 112 113 /* create two temporary index sets for building scatter-gather */ 114 115 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 116 for (i=0,col=0; i<ec; i++) { 117 garray[i] = bs*garray[i]; 118 } 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 120 for (i=0,col=0; i<ec; i++) { 121 garray[i] = garray[i]/bs; 122 } 123 124 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 125 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 127 ierr = PetscFree(stmp);CHKERRQ(ierr); 128 129 /* create temporary global vector to generate scatter context */ 130 /* this is inefficient, but otherwise we must do either 131 1) save garray until the first actual scatter when the vector is known or 132 2) have another way of generating a scatter context without a vector.*/ 133 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 134 135 /* gnerate the scatter context */ 136 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 137 138 /* 139 Post the receives for the first matrix vector product. We sync-chronize after 140 this on the chance that the user immediately calls MatMult() after assemblying 141 the matrix. 142 */ 143 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 144 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 145 146 PetscLogObjectParent(mat,baij->Mvctx); 147 PetscLogObjectParent(mat,baij->lvec); 148 PetscLogObjectParent(mat,from); 149 PetscLogObjectParent(mat,to); 150 baij->garray = garray; 151 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 152 ierr = ISDestroy(from);CHKERRQ(ierr); 153 ierr = ISDestroy(to);CHKERRQ(ierr); 154 ierr = VecDestroy(gvec);CHKERRQ(ierr); 155 ierr = PetscFree(tmp);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 160 /* 161 Takes the local part of an already assembled MPIBAIJ matrix 162 and disassembles it. This is to allow new nonzeros into the matrix 163 that require more communication in the matrix vector multiply. 164 Thus certain data-structures must be rebuilt. 165 166 Kind of slow! But that's what application programmers get when 167 they are sloppy. 168 */ 169 #undef __FUNC__ 170 #define __FUNC__ "DisAssemble_MPISBAIJ" 171 int DisAssemble_MPISBAIJ(Mat A) 172 { 173 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 174 Mat B = baij->B,Bnew; 175 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 176 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 177 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 178 MatScalar *a = Bbaij->a; 179 Scalar *atmp; 180 #if defined(PETSC_USE_MAT_SINGLE) 181 int l; 182 #endif 183 184 PetscFunctionBegin; 185 #if defined(PETSC_USE_MAT_SINGLE) 186 ierr = PetscMalloc(baij->bs*sizeof(Scalar),&atmp); 187 #endif 188 /* free stuff related to matrix-vec multiply */ 189 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 190 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 191 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 192 if (baij->colmap) { 193 #if defined (PETSC_USE_CTABLE) 194 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 195 #else 196 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 197 baij->colmap = 0; 198 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 199 #endif 200 } 201 202 /* make sure that B is assembled so we can access its values */ 203 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205 206 /* invent new B and copy stuff over */ 207 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 208 for (i=0; i<mbs; i++) { 209 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 210 } 211 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 212 ierr = PetscFree(nz);CHKERRQ(ierr); 213 214 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 215 for (i=0; i<mbs; i++) { 216 rvals[0] = bs*i; 217 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 218 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 219 col = garray[Bbaij->j[j]]*bs; 220 for (k=0; k<bs; k++) { 221 #if defined(PETSC_USE_MAT_SINGLE) 222 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 223 #else 224 atmp = a+j*bs2; 225 #endif 226 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 227 col++; 228 } 229 } 230 } 231 #if defined(PETSC_USE_MAT_SINGLE) 232 ierr = PetscFree(atmp);CHKERRQ(ierr); 233 #endif 234 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 235 baij->garray = 0; 236 ierr = PetscFree(rvals);CHKERRQ(ierr); 237 PetscLogObjectMemory(A,-ec*sizeof(int)); 238 ierr = MatDestroy(B);CHKERRQ(ierr); 239 PetscLogObjectParent(A,Bnew); 240 baij->B = Bnew; 241 A->was_assembled = PETSC_FALSE; 242 PetscFunctionReturn(0); 243 } 244 245 246 247