1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/ 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 #include "src/vec/vecimpl.h" 8 9 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 13 int MatSetUpMultiply_MPIBAIJ(Mat mat) 14 { 15 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 17 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 18 int bs = baij->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 int gid,lid; 25 #endif 26 27 PetscFunctionBegin; 28 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<B->mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 int data,gid1 = aj[B->i[i]+j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<B->mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 int gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 baij->B->n = ec*B->bs; 66 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 67 /* Mark Adams */ 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 72 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 73 for (i=0; i<B->mbs; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j]] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 82 ec = 0; 83 for (i=0; i<Nbs; i++) { 84 if (indices[i]) { 85 garray[ec++] = i; 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i=0; i<ec; i++) { 91 indices[garray[i]] = i; 92 } 93 94 /* compact out the extra columns in B */ 95 for (i=0; i<B->mbs; i++) { 96 for (j=0; j<B->ilen[i]; j++) { 97 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98 } 99 } 100 B->nbs = ec; 101 baij->B->n = ec*B->bs; 102 ierr = PetscFree(indices);CHKERRQ(ierr); 103 #endif 104 105 /* create local vector that is used to scatter into */ 106 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 107 108 /* create two temporary index sets for building scatter-gather */ 109 for (i=0; i<ec; i++) { 110 garray[i] = bs*garray[i]; 111 } 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 113 for (i=0; i<ec; i++) { 114 garray[i] = garray[i]/bs; 115 } 116 117 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 118 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 120 ierr = PetscFree(stmp);CHKERRQ(ierr); 121 122 /* create temporary global vector to generate scatter context */ 123 /* this is inefficient, but otherwise we must do either 124 1) save garray until the first actual scatter when the vector is known or 125 2) have another way of generating a scatter context without a vector.*/ 126 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 127 128 /* gnerate the scatter context */ 129 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 130 131 /* 132 Post the receives for the first matrix vector product. We sync-chronize after 133 this on the chance that the user immediately calls MatMult() after assemblying 134 the matrix. 135 */ 136 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 137 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 138 139 PetscLogObjectParent(mat,baij->Mvctx); 140 PetscLogObjectParent(mat,baij->lvec); 141 PetscLogObjectParent(mat,from); 142 PetscLogObjectParent(mat,to); 143 baij->garray = garray; 144 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 145 ierr = ISDestroy(from);CHKERRQ(ierr); 146 ierr = ISDestroy(to);CHKERRQ(ierr); 147 ierr = VecDestroy(gvec);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /* 152 Takes the local part of an already assembled MPIBAIJ matrix 153 and disassembles it. This is to allow new nonzeros into the matrix 154 that require more communication in the matrix vector multiply. 155 Thus certain data-structures must be rebuilt. 156 157 Kind of slow! But that's what application programmers get when 158 they are sloppy. 159 */ 160 #undef __FUNCT__ 161 #define __FUNCT__ "DisAssemble_MPIBAIJ" 162 int DisAssemble_MPIBAIJ(Mat A) 163 { 164 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 165 Mat B = baij->B,Bnew; 166 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 167 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 168 int bs2 = baij->bs2,*nz,ec,m = A->m; 169 MatScalar *a = Bbaij->a; 170 PetscScalar *atmp; 171 #if defined(PETSC_USE_MAT_SINGLE) 172 int k; 173 #endif 174 175 PetscFunctionBegin; 176 /* free stuff related to matrix-vec multiply */ 177 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 178 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 179 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 180 if (baij->colmap) { 181 #if defined (PETSC_USE_CTABLE) 182 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 183 #else 184 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 185 baij->colmap = 0; 186 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 187 #endif 188 } 189 190 /* make sure that B is assembled so we can access its values */ 191 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 192 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 193 194 /* invent new B and copy stuff over */ 195 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 196 for (i=0; i<mbs; i++) { 197 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 198 } 199 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 200 ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 201 202 #if defined(PETSC_USE_MAT_SINGLE) 203 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 204 #endif 205 for (i=0; i<mbs; i++) { 206 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 207 col = garray[Bbaij->j[j]]; 208 #if defined(PETSC_USE_MAT_SINGLE) 209 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 210 #else 211 atmp = a + j*bs2; 212 #endif 213 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 214 } 215 } 216 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 217 218 #if defined(PETSC_USE_MAT_SINGLE) 219 ierr = PetscFree(atmp);CHKERRQ(ierr); 220 #endif 221 222 ierr = PetscFree(nz);CHKERRQ(ierr); 223 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 224 baij->garray = 0; 225 PetscLogObjectMemory(A,-ec*sizeof(int)); 226 ierr = MatDestroy(B);CHKERRQ(ierr); 227 PetscLogObjectParent(A,Bnew); 228 baij->B = Bnew; 229 A->was_assembled = PETSC_FALSE; 230 PetscFunctionReturn(0); 231 } 232 233 /* ugly stuff added for Glenn someday we should fix this up */ 234 235 static int *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 236 parts of the local matrix */ 237 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 238 239 240 #undef __FUNCT__ 241 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 242 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 243 { 244 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 245 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)ina->A->data; 246 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 247 int ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 248 int *r_rmapd,*r_rmapo; 249 250 PetscFunctionBegin; 251 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 252 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 253 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 254 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->bmapping->n; i++) { 257 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 258 nt++; 259 r_rmapd[i] = inA->bmapping->indices[i] + 1; 260 } 261 } 262 if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n); 263 ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr); 264 for (i=0; i<inA->bmapping->n; i++) { 265 if (r_rmapd[i]){ 266 for (j=0; j<bs; j++) { 267 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 268 } 269 } 270 } 271 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 272 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 273 274 ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr); 275 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr); 276 for (i=0; i<B->nbs; i++) { 277 lindices[garray[i]] = i+1; 278 } 279 no = inA->bmapping->n - nt; 280 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 281 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr); 282 nt = 0; 283 for (i=0; i<inA->bmapping->n; i++) { 284 if (lindices[inA->bmapping->indices[i]]) { 285 nt++; 286 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 287 } 288 } 289 if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 290 ierr = PetscFree(lindices);CHKERRQ(ierr); 291 ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr); 292 for (i=0; i<inA->bmapping->n; i++) { 293 if (r_rmapo[i]){ 294 for (j=0; j<bs; j++) { 295 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 296 } 297 } 298 } 299 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 300 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 301 302 PetscFunctionReturn(0); 303 } 304 305 #undef __FUNCT__ 306 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 307 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 308 { 309 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 310 int ierr,(*f)(Mat,Vec); 311 312 PetscFunctionBegin; 313 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 314 if (f) { 315 ierr = (*f)(A,scale);CHKERRQ(ierr); 316 } 317 PetscFunctionReturn(0); 318 } 319 320 EXTERN_C_BEGIN 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 323 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 324 { 325 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 326 int ierr,n,i; 327 PetscScalar *d,*o,*s; 328 329 PetscFunctionBegin; 330 if (!uglyrmapd) { 331 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 332 } 333 334 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 335 336 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 337 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 338 for (i=0; i<n; i++) { 339 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 340 } 341 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 342 /* column scale "diagonal" portion of local matrix */ 343 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 344 345 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 346 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 347 for (i=0; i<n; i++) { 348 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 349 } 350 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 351 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 352 /* column scale "off-diagonal" portion of local matrix */ 353 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 354 355 PetscFunctionReturn(0); 356 } 357 EXTERN_C_END 358 359 360