1 /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17 int bs = baij->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 int gid,lid; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 PetscTableCreate(B->mbs,&gid1_lid1); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 int data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 43 /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */ 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 int gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->n = ec*B->bs; 67 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* For the first stab we make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 83 /* ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr); */ 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->n = ec*B->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 /* 107 for (i=0,col=0; i<ec; i++) { 108 for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109 } 110 */ 111 /* create local vector that is used to scatter into */ 112 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 113 114 /* create two temporary index sets for building scatter-gather */ 115 116 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 117 for (i=0; i<ec; i++) { 118 garray[i] = bs*garray[i]; 119 } 120 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 121 for (i=0; i<ec; i++) { 122 garray[i] = garray[i]/bs; 123 } 124 125 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 126 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 128 ierr = PetscFree(stmp);CHKERRQ(ierr); 129 130 /* create temporary global vector to generate scatter context */ 131 /* this is inefficient, but otherwise we must do either 132 1) save garray until the first actual scatter when the vector is known or 133 2) have another way of generating a scatter context without a vector.*/ 134 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 135 136 /* gnerate the scatter context */ 137 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 138 139 /* 140 Post the receives for the first matrix vector product. We sync-chronize after 141 this on the chance that the user immediately calls MatMult() after assemblying 142 the matrix. 143 */ 144 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 145 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 146 147 PetscLogObjectParent(mat,baij->Mvctx); 148 PetscLogObjectParent(mat,baij->lvec); 149 PetscLogObjectParent(mat,from); 150 PetscLogObjectParent(mat,to); 151 baij->garray = garray; 152 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 153 ierr = ISDestroy(from);CHKERRQ(ierr); 154 ierr = ISDestroy(to);CHKERRQ(ierr); 155 ierr = VecDestroy(gvec);CHKERRQ(ierr); 156 /* ierr = PetscFree(tmp);CHKERRQ(ierr); */ 157 PetscFunctionReturn(0); 158 } 159 160 161 /* 162 Takes the local part of an already assembled MPIBAIJ matrix 163 and disassembles it. This is to allow new nonzeros into the matrix 164 that require more communication in the matrix vector multiply. 165 Thus certain data-structures must be rebuilt. 166 167 Kind of slow! But that's what application programmers get when 168 they are sloppy. 169 */ 170 #undef __FUNCT__ 171 #define __FUNCT__ "DisAssemble_MPISBAIJ" 172 int DisAssemble_MPISBAIJ(Mat A) 173 { 174 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 175 Mat B = baij->B,Bnew; 176 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 177 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 178 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 179 MatScalar *a = Bbaij->a; 180 PetscScalar *atmp; 181 #if defined(PETSC_USE_MAT_SINGLE) 182 int l; 183 #endif 184 185 PetscFunctionBegin; 186 #if defined(PETSC_USE_MAT_SINGLE) 187 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 188 #endif 189 /* free stuff related to matrix-vec multiply */ 190 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 191 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 192 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 193 if (baij->colmap) { 194 #if defined (PETSC_USE_CTABLE) 195 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 196 #else 197 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 198 baij->colmap = 0; 199 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 200 #endif 201 } 202 203 /* make sure that B is assembled so we can access its values */ 204 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 206 207 /* invent new B and copy stuff over */ 208 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 209 for (i=0; i<mbs; i++) { 210 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 211 } 212 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 213 ierr = PetscFree(nz);CHKERRQ(ierr); 214 215 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 216 for (i=0; i<mbs; i++) { 217 rvals[0] = bs*i; 218 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 219 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 220 col = garray[Bbaij->j[j]]*bs; 221 for (k=0; k<bs; k++) { 222 #if defined(PETSC_USE_MAT_SINGLE) 223 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 224 #else 225 atmp = a+j*bs2 + k*bs; 226 #endif 227 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 228 col++; 229 } 230 } 231 } 232 #if defined(PETSC_USE_MAT_SINGLE) 233 ierr = PetscFree(atmp);CHKERRQ(ierr); 234 #endif 235 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 236 baij->garray = 0; 237 ierr = PetscFree(rvals);CHKERRQ(ierr); 238 PetscLogObjectMemory(A,-ec*sizeof(int)); 239 ierr = MatDestroy(B);CHKERRQ(ierr); 240 PetscLogObjectParent(A,Bnew); 241 baij->B = Bnew; 242 A->was_assembled = PETSC_FALSE; 243 PetscFunctionReturn(0); 244 } 245 246 247 248