1 /*$Id: mmbaij.c,v 1.46 2001/09/25 00:31:36 balay Exp $*/ 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 int MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 int Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray; 17 int bs = baij->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 int gid,lid; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 int data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<B->mbs; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 int gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid --; 60 aj[B->i[i]+j] = lid; 61 } 62 } 63 B->nbs = ec; 64 baij->B->n = ec*B->bs; 65 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 66 /* Mark Adams */ 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in baij->B */ 70 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 71 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 72 for (i=0; i<B->mbs; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<Nbs; i++) { 83 if (indices[i]) { 84 garray[ec++] = i; 85 } 86 } 87 88 /* make indices now point into garray */ 89 for (i=0; i<ec; i++) { 90 indices[garray[i]] = i; 91 } 92 93 /* compact out the extra columns in B */ 94 for (i=0; i<B->mbs; i++) { 95 for (j=0; j<B->ilen[i]; j++) { 96 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 97 } 98 } 99 B->nbs = ec; 100 baij->B->n = ec*B->bs; 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 104 /* create local vector that is used to scatter into */ 105 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 106 107 /* create two temporary index sets for building scatter-gather */ 108 for (i=0; i<ec; i++) { 109 garray[i] = bs*garray[i]; 110 } 111 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 112 for (i=0; i<ec; i++) { 113 garray[i] = garray[i]/bs; 114 } 115 116 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 117 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 118 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 119 ierr = PetscFree(stmp);CHKERRQ(ierr); 120 121 /* create temporary global vector to generate scatter context */ 122 /* this is inefficient, but otherwise we must do either 123 1) save garray until the first actual scatter when the vector is known or 124 2) have another way of generating a scatter context without a vector.*/ 125 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 126 127 /* gnerate the scatter context */ 128 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 129 130 /* 131 Post the receives for the first matrix vector product. We sync-chronize after 132 this on the chance that the user immediately calls MatMult() after assemblying 133 the matrix. 134 */ 135 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 136 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 137 138 PetscLogObjectParent(mat,baij->Mvctx); 139 PetscLogObjectParent(mat,baij->lvec); 140 PetscLogObjectParent(mat,from); 141 PetscLogObjectParent(mat,to); 142 baij->garray = garray; 143 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 144 ierr = ISDestroy(from);CHKERRQ(ierr); 145 ierr = ISDestroy(to);CHKERRQ(ierr); 146 ierr = VecDestroy(gvec);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 /* 151 Takes the local part of an already assembled MPIBAIJ matrix 152 and disassembles it. This is to allow new nonzeros into the matrix 153 that require more communication in the matrix vector multiply. 154 Thus certain data-structures must be rebuilt. 155 156 Kind of slow! But that's what application programmers get when 157 they are sloppy. 158 */ 159 #undef __FUNCT__ 160 #define __FUNCT__ "DisAssemble_MPIBAIJ" 161 int DisAssemble_MPIBAIJ(Mat A) 162 { 163 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 164 Mat B = baij->B,Bnew; 165 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 166 int ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 167 int bs2 = baij->bs2,*nz,ec,m = A->m; 168 MatScalar *a = Bbaij->a; 169 PetscScalar *atmp; 170 #if defined(PETSC_USE_MAT_SINGLE) 171 int k; 172 #endif 173 174 PetscFunctionBegin; 175 /* free stuff related to matrix-vec multiply */ 176 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 177 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 178 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 179 if (baij->colmap) { 180 #if defined (PETSC_USE_CTABLE) 181 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 182 #else 183 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 184 baij->colmap = 0; 185 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 186 #endif 187 } 188 189 /* make sure that B is assembled so we can access its values */ 190 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 191 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 192 193 /* invent new B and copy stuff over */ 194 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 195 for (i=0; i<mbs; i++) { 196 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 197 } 198 ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr); 199 ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 200 201 #if defined(PETSC_USE_MAT_SINGLE) 202 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 203 #endif 204 for (i=0; i<mbs; i++) { 205 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 206 col = garray[Bbaij->j[j]]; 207 #if defined(PETSC_USE_MAT_SINGLE) 208 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 209 #else 210 atmp = a + j*bs2; 211 #endif 212 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 213 } 214 } 215 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 216 217 #if defined(PETSC_USE_MAT_SINGLE) 218 ierr = PetscFree(atmp);CHKERRQ(ierr); 219 #endif 220 221 ierr = PetscFree(nz);CHKERRQ(ierr); 222 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 223 baij->garray = 0; 224 PetscLogObjectMemory(A,-ec*sizeof(int)); 225 ierr = MatDestroy(B);CHKERRQ(ierr); 226 PetscLogObjectParent(A,Bnew); 227 baij->B = Bnew; 228 A->was_assembled = PETSC_FALSE; 229 PetscFunctionReturn(0); 230 } 231 232 /* ugly stuff added for Glenn someday we should fix this up */ 233 234 static int *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 235 parts of the local matrix */ 236 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 237 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 241 int MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 242 { 243 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 244 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)ina->A->data; 245 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 246 int ierr,bs = A->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 247 int *r_rmapd,*r_rmapo; 248 249 PetscFunctionBegin; 250 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 251 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 252 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapd);CHKERRQ(ierr); 253 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(int));CHKERRQ(ierr); 254 nt = 0; 255 for (i=0; i<inA->bmapping->n; i++) { 256 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 257 nt++; 258 r_rmapd[i] = inA->bmapping->indices[i] + 1; 259 } 260 } 261 if (nt*bs != n) SETERRQ2(1,"Hmm nt*bs %d n %d",nt*bs,n); 262 ierr = PetscMalloc((n+1)*sizeof(int),&uglyrmapd);CHKERRQ(ierr); 263 for (i=0; i<inA->bmapping->n; i++) { 264 if (r_rmapd[i]){ 265 for (j=0; j<bs; j++) { 266 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 267 } 268 } 269 } 270 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 271 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 272 273 ierr = PetscMalloc((ina->Nbs+1)*sizeof(int),&lindices);CHKERRQ(ierr); 274 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(int));CHKERRQ(ierr); 275 for (i=0; i<B->nbs; i++) { 276 lindices[garray[i]] = i+1; 277 } 278 no = inA->bmapping->n - nt; 279 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(int),&r_rmapo);CHKERRQ(ierr); 280 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(int));CHKERRQ(ierr); 281 nt = 0; 282 for (i=0; i<inA->bmapping->n; i++) { 283 if (lindices[inA->bmapping->indices[i]]) { 284 nt++; 285 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 286 } 287 } 288 if (nt > no) SETERRQ2(1,"Hmm nt %d no %d",nt,n); 289 ierr = PetscFree(lindices);CHKERRQ(ierr); 290 ierr = PetscMalloc((nt*bs+1)*sizeof(int),&uglyrmapo);CHKERRQ(ierr); 291 for (i=0; i<inA->bmapping->n; i++) { 292 if (r_rmapo[i]){ 293 for (j=0; j<bs; j++) { 294 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 295 } 296 } 297 } 298 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 299 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 300 301 PetscFunctionReturn(0); 302 } 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 306 int MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 307 { 308 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 309 int ierr,(*f)(Mat,Vec); 310 311 PetscFunctionBegin; 312 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 313 if (f) { 314 ierr = (*f)(A,scale);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 EXTERN_C_BEGIN 320 #undef __FUNCT__ 321 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 322 int MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 323 { 324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 325 int ierr,n,i; 326 PetscScalar *d,*o,*s; 327 328 PetscFunctionBegin; 329 if (!uglyrmapd) { 330 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 331 } 332 333 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 334 335 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 336 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 337 for (i=0; i<n; i++) { 338 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 339 } 340 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 341 /* column scale "diagonal" portion of local matrix */ 342 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 343 344 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 345 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 346 for (i=0; i<n; i++) { 347 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 348 } 349 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 350 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 351 /* column scale "off-diagonal" portion of local matrix */ 352 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 353 354 PetscFunctionReturn(0); 355 } 356 EXTERN_C_END 357 358 359