1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap.bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs; 68 ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap.n =baij->B->cmap.N = ec*mat->rmap.bs; 105 ierr = PetscMapInitialize(baij->B->comm,&(baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 for (i=0; i<ec; i++) { 114 garray[i] = bs*garray[i]; 115 } 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 117 for (i=0; i<ec; i++) { 118 garray[i] = garray[i]/bs; 119 } 120 121 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 122 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 126 /* create temporary global vector to generate scatter context */ 127 /* this is inefficient, but otherwise we must do either 128 1) save garray until the first actual scatter when the vector is known or 129 2) have another way of generating a scatter context without a vector.*/ 130 ierr = VecCreateMPI(mat->comm,mat->cmap.n,mat->cmap.N,&gvec);CHKERRQ(ierr); 131 132 /* gnerate the scatter context */ 133 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 134 135 /* 136 Post the receives for the first matrix vector product. We sync-chronize after 137 this on the chance that the user immediately calls MatMult() after assemblying 138 the matrix. 139 */ 140 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 141 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 142 143 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 147 baij->garray = garray; 148 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 149 ierr = ISDestroy(from);CHKERRQ(ierr); 150 ierr = ISDestroy(to);CHKERRQ(ierr); 151 ierr = VecDestroy(gvec);CHKERRQ(ierr); 152 PetscFunctionReturn(0); 153 } 154 155 /* 156 Takes the local part of an already assembled MPIBAIJ matrix 157 and disassembles it. This is to allow new nonzeros into the matrix 158 that require more communication in the matrix vector multiply. 159 Thus certain data-structures must be rebuilt. 160 161 Kind of slow! But that's what application programmers get when 162 they are sloppy. 163 */ 164 #undef __FUNCT__ 165 #define __FUNCT__ "DisAssemble_MPIBAIJ" 166 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 167 { 168 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 169 Mat B = baij->B,Bnew; 170 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 171 PetscErrorCode ierr; 172 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap.n,col,*garray=baij->garray; 173 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap.n; 174 MatScalar *a = Bbaij->a; 175 PetscScalar *atmp; 176 #if defined(PETSC_USE_MAT_SINGLE) 177 PetscInt k; 178 #endif 179 180 PetscFunctionBegin; 181 /* free stuff related to matrix-vec multiply */ 182 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 183 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 184 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 185 if (baij->colmap) { 186 #if defined (PETSC_USE_CTABLE) 187 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 188 #else 189 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 190 baij->colmap = 0; 191 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 192 #endif 193 } 194 195 /* make sure that B is assembled so we can access its values */ 196 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 197 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 198 199 /* invent new B and copy stuff over */ 200 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 201 for (i=0; i<mbs; i++) { 202 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 203 } 204 ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr); 205 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 206 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 207 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr); 208 ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 209 210 #if defined(PETSC_USE_MAT_SINGLE) 211 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 212 #endif 213 for (i=0; i<mbs; i++) { 214 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 215 col = garray[Bbaij->j[j]]; 216 #if defined(PETSC_USE_MAT_SINGLE) 217 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 218 #else 219 atmp = a + j*bs2; 220 #endif 221 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 222 } 223 } 224 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 225 226 #if defined(PETSC_USE_MAT_SINGLE) 227 ierr = PetscFree(atmp);CHKERRQ(ierr); 228 #endif 229 230 ierr = PetscFree(nz);CHKERRQ(ierr); 231 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 232 baij->garray = 0; 233 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 234 ierr = MatDestroy(B);CHKERRQ(ierr); 235 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 236 baij->B = Bnew; 237 A->was_assembled = PETSC_FALSE; 238 PetscFunctionReturn(0); 239 } 240 241 /* ugly stuff added for Glenn someday we should fix this up */ 242 243 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 244 parts of the local matrix */ 245 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 246 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 250 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 251 { 252 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 253 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 254 PetscErrorCode ierr; 255 PetscInt bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 256 PetscInt *r_rmapd,*r_rmapo; 257 258 PetscFunctionBegin; 259 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 260 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 261 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 262 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 263 nt = 0; 264 for (i=0; i<inA->bmapping->n; i++) { 265 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 266 nt++; 267 r_rmapd[i] = inA->bmapping->indices[i] + 1; 268 } 269 } 270 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 271 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 272 for (i=0; i<inA->bmapping->n; i++) { 273 if (r_rmapd[i]){ 274 for (j=0; j<bs; j++) { 275 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 276 } 277 } 278 } 279 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 280 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 281 282 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 283 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 284 for (i=0; i<B->nbs; i++) { 285 lindices[garray[i]] = i+1; 286 } 287 no = inA->bmapping->n - nt; 288 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 289 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 290 nt = 0; 291 for (i=0; i<inA->bmapping->n; i++) { 292 if (lindices[inA->bmapping->indices[i]]) { 293 nt++; 294 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 295 } 296 } 297 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 298 ierr = PetscFree(lindices);CHKERRQ(ierr); 299 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 300 for (i=0; i<inA->bmapping->n; i++) { 301 if (r_rmapo[i]){ 302 for (j=0; j<bs; j++) { 303 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 304 } 305 } 306 } 307 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 308 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 309 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 315 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 316 { 317 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 318 PetscErrorCode ierr,(*f)(Mat,Vec); 319 320 PetscFunctionBegin; 321 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 322 if (f) { 323 ierr = (*f)(A,scale);CHKERRQ(ierr); 324 } 325 PetscFunctionReturn(0); 326 } 327 328 EXTERN_C_BEGIN 329 #undef __FUNCT__ 330 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 331 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 332 { 333 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 334 PetscErrorCode ierr; 335 PetscInt n,i; 336 PetscScalar *d,*o,*s; 337 338 PetscFunctionBegin; 339 if (!uglyrmapd) { 340 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 341 } 342 343 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 344 345 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 346 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 347 for (i=0; i<n; i++) { 348 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 349 } 350 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 351 /* column scale "diagonal" portion of local matrix */ 352 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 353 354 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 355 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 356 for (i=0; i<n; i++) { 357 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 358 } 359 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 360 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 361 /* column scale "off-diagonal" portion of local matrix */ 362 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 363 364 PetscFunctionReturn(0); 365 } 366 EXTERN_C_END 367 368 369