1 /*$Id: mmsbaij.c,v 1.1 2000/07/07 20:55:55 balay Exp balay $*/ 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 9 10 #undef __FUNC__ 11 #define __FUNC__ /*<a name=""></a>*/"MatSetUpMultiply_MPISBAIJ" 12 int MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17 int col,bs = baij->bs,*tmp,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 int gid,lid; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 PetscTableCreate(B->mbs,&gid1_lid1); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 int data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 garray = (int *)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 43 tmp = (int *)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 /* qsort(garray, ec, sizeof(int), intcomparcarc); */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 int gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 B->n = ec*B->bs; 67 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 68 /* Mark Adams */ 69 #else 70 /* For the first stab we make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 indices = (int*)PetscMalloc((Nbs+1)*sizeof(int));CHKPTRQ(indices); 73 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j] ] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 garray = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(garray); 83 tmp = (int*)PetscMalloc((ec*bs+1)*sizeof(int));CHKPTRQ(tmp) 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 B->n = ec*B->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 for (i=0,col=0; i<ec; i++) { 108 for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j; 109 } 110 /* create local vector that is used to scatter into */ 111 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 112 113 /* create two temporary index sets for building scatter-gather */ 114 115 /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */ 116 for (i=0,col=0; i<ec; i++) { 117 garray[i] = bs*garray[i]; 118 } 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 120 for (i=0,col=0; i<ec; i++) { 121 garray[i] = garray[i]/bs; 122 } 123 124 stmp = (int*)PetscMalloc((ec+1)*sizeof(int));CHKPTRQ(stmp); 125 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 126 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 127 ierr = PetscFree(stmp);CHKERRQ(ierr); 128 129 /* create temporary global vector to generate scatter context */ 130 /* this is inefficient, but otherwise we must do either 131 1) save garray until the first actual scatter when the vector is known or 132 2) have another way of generating a scatter context without a vector.*/ 133 ierr = VecCreateMPI(mat->comm,baij->n,baij->N,&gvec);CHKERRQ(ierr); 134 135 /* gnerate the scatter context */ 136 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 137 138 /* 139 Post the receives for the first matrix vector product. We sync-chronize after 140 this on the chance that the user immediately calls MatMult() after assemblying 141 the matrix. 142 */ 143 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 144 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 145 146 PLogObjectParent(mat,baij->Mvctx); 147 PLogObjectParent(mat,baij->lvec); 148 PLogObjectParent(mat,from); 149 PLogObjectParent(mat,to); 150 baij->garray = garray; 151 PLogObjectMemory(mat,(ec+1)*sizeof(int)); 152 ierr = ISDestroy(from);CHKERRQ(ierr); 153 ierr = ISDestroy(to);CHKERRQ(ierr); 154 ierr = VecDestroy(gvec);CHKERRQ(ierr); 155 ierr = PetscFree(tmp);CHKERRQ(ierr); 156 PetscFunctionReturn(0); 157 } 158 159 160 /* 161 Takes the local part of an already assembled MPIBAIJ matrix 162 and disassembles it. This is to allow new nonzeros into the matrix 163 that require more communication in the matrix vector multiply. 164 Thus certain data-structures must be rebuilt. 165 166 Kind of slow! But that's what application programmers get when 167 they are sloppy. 168 */ 169 #undef __FUNC__ 170 #define __FUNC__ /*<a name=""></a>*/"DisAssemble_MPISBAIJ" 171 int DisAssemble_MPISBAIJ(Mat A) 172 { 173 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 174 Mat B = baij->B,Bnew; 175 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 176 int ierr,i,j,mbs=Bbaij->mbs,n = baij->N,col,*garray=baij->garray; 177 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=Bbaij->m; 178 MatScalar *a = Bbaij->a; 179 #if defined(PETSC_USE_MAT_SINGLE) 180 Scalar *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar)); 181 int l; 182 #else 183 Scalar *atmp; 184 #endif 185 186 PetscFunctionBegin; 187 /* free stuff related to matrix-vec multiply */ 188 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PLogObjectMemory below */ 189 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 190 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 191 if (baij->colmap) { 192 #if defined (PETSC_USE_CTABLE) 193 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 194 #else 195 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 196 baij->colmap = 0; 197 PLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 198 #endif 199 } 200 201 /* make sure that B is assembled so we can access its values */ 202 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 203 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204 205 /* invent new B and copy stuff over */ 206 nz = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(nz); 207 for (i=0; i<mbs; i++) { 208 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 209 } 210 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 211 ierr = PetscFree(nz);CHKERRQ(ierr); 212 213 rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 214 for (i=0; i<mbs; i++) { 215 rvals[0] = bs*i; 216 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 217 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 218 col = garray[Bbaij->j[j]]*bs; 219 for (k=0; k<bs; k++) { 220 #if defined(PETSC_USE_MAT_SINGLE) 221 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 222 #else 223 atmp = a+j*bs2; 224 #endif 225 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 226 col++; 227 } 228 } 229 } 230 #if defined(PETSC_USE_MAT_SINGLE) 231 ierr = PetscFree(atmp);CHKERRQ(ierr); 232 #endif 233 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 234 baij->garray = 0; 235 ierr = PetscFree(rvals);CHKERRQ(ierr); 236 PLogObjectMemory(A,-ec*sizeof(int)); 237 ierr = MatDestroy(B);CHKERRQ(ierr); 238 PLogObjectParent(A,Bnew); 239 baij->B = Bnew; 240 A->was_assembled = PETSC_FALSE; 241 PetscFunctionReturn(0); 242 } 243 244 245 246