1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->n = ec*mat->bs; 68 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 69 /* Mark Adams */ 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 74 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 75 for (i=0; i<B->mbs; i++) { 76 for (j=0; j<B->ilen[i]; j++) { 77 if (!indices[aj[B->i[i] + j]]) ec++; 78 indices[aj[B->i[i] + j]] = 1; 79 } 80 } 81 82 /* form array of columns we need */ 83 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->n = ec*mat->bs; 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 109 110 /* create two temporary index sets for building scatter-gather */ 111 for (i=0; i<ec; i++) { 112 garray[i] = bs*garray[i]; 113 } 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) { 116 garray[i] = garray[i]/bs; 117 } 118 119 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 120 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 121 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 122 ierr = PetscFree(stmp);CHKERRQ(ierr); 123 124 /* create temporary global vector to generate scatter context */ 125 /* this is inefficient, but otherwise we must do either 126 1) save garray until the first actual scatter when the vector is known or 127 2) have another way of generating a scatter context without a vector.*/ 128 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 129 130 /* gnerate the scatter context */ 131 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 132 133 /* 134 Post the receives for the first matrix vector product. We sync-chronize after 135 this on the chance that the user immediately calls MatMult() after assemblying 136 the matrix. 137 */ 138 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 139 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 140 141 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 145 baij->garray = garray; 146 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 147 ierr = ISDestroy(from);CHKERRQ(ierr); 148 ierr = ISDestroy(to);CHKERRQ(ierr); 149 ierr = VecDestroy(gvec);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /* 154 Takes the local part of an already assembled MPIBAIJ matrix 155 and disassembles it. This is to allow new nonzeros into the matrix 156 that require more communication in the matrix vector multiply. 157 Thus certain data-structures must be rebuilt. 158 159 Kind of slow! But that's what application programmers get when 160 they are sloppy. 161 */ 162 #undef __FUNCT__ 163 #define __FUNCT__ "DisAssemble_MPIBAIJ" 164 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 165 { 166 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 167 Mat B = baij->B,Bnew; 168 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 169 PetscErrorCode ierr; 170 PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 171 PetscInt bs2 = baij->bs2,*nz,ec,m = A->m; 172 MatScalar *a = Bbaij->a; 173 PetscScalar *atmp; 174 #if defined(PETSC_USE_MAT_SINGLE) 175 PetscInt k; 176 #endif 177 178 PetscFunctionBegin; 179 /* free stuff related to matrix-vec multiply */ 180 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 181 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 182 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 183 if (baij->colmap) { 184 #if defined (PETSC_USE_CTABLE) 185 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 186 #else 187 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 188 baij->colmap = 0; 189 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 190 #endif 191 } 192 193 /* make sure that B is assembled so we can access its values */ 194 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 195 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 196 197 /* invent new B and copy stuff over */ 198 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 199 for (i=0; i<mbs; i++) { 200 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 201 } 202 ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr); 203 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 204 ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr); 205 ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr); 206 207 #if defined(PETSC_USE_MAT_SINGLE) 208 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 209 #endif 210 for (i=0; i<mbs; i++) { 211 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 212 col = garray[Bbaij->j[j]]; 213 #if defined(PETSC_USE_MAT_SINGLE) 214 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 215 #else 216 atmp = a + j*bs2; 217 #endif 218 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 219 } 220 } 221 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr); 222 223 #if defined(PETSC_USE_MAT_SINGLE) 224 ierr = PetscFree(atmp);CHKERRQ(ierr); 225 #endif 226 227 ierr = PetscFree(nz);CHKERRQ(ierr); 228 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 229 baij->garray = 0; 230 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = MatDestroy(B);CHKERRQ(ierr); 232 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 233 baij->B = Bnew; 234 A->was_assembled = PETSC_FALSE; 235 PetscFunctionReturn(0); 236 } 237 238 /* ugly stuff added for Glenn someday we should fix this up */ 239 240 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 241 parts of the local matrix */ 242 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 243 244 245 #undef __FUNCT__ 246 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 247 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 248 { 249 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 250 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 251 PetscErrorCode ierr; 252 PetscInt bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 253 PetscInt *r_rmapd,*r_rmapo; 254 255 PetscFunctionBegin; 256 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 257 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 258 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 259 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 260 nt = 0; 261 for (i=0; i<inA->bmapping->n; i++) { 262 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 263 nt++; 264 r_rmapd[i] = inA->bmapping->indices[i] + 1; 265 } 266 } 267 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 268 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 269 for (i=0; i<inA->bmapping->n; i++) { 270 if (r_rmapd[i]){ 271 for (j=0; j<bs; j++) { 272 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 273 } 274 } 275 } 276 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 277 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 278 279 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 280 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 281 for (i=0; i<B->nbs; i++) { 282 lindices[garray[i]] = i+1; 283 } 284 no = inA->bmapping->n - nt; 285 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 286 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 287 nt = 0; 288 for (i=0; i<inA->bmapping->n; i++) { 289 if (lindices[inA->bmapping->indices[i]]) { 290 nt++; 291 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 292 } 293 } 294 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 295 ierr = PetscFree(lindices);CHKERRQ(ierr); 296 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 297 for (i=0; i<inA->bmapping->n; i++) { 298 if (r_rmapo[i]){ 299 for (j=0; j<bs; j++) { 300 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 301 } 302 } 303 } 304 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 305 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 306 307 PetscFunctionReturn(0); 308 } 309 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 312 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 313 { 314 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 315 PetscErrorCode ierr,(*f)(Mat,Vec); 316 317 PetscFunctionBegin; 318 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 319 if (f) { 320 ierr = (*f)(A,scale);CHKERRQ(ierr); 321 } 322 PetscFunctionReturn(0); 323 } 324 325 EXTERN_C_BEGIN 326 #undef __FUNCT__ 327 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 328 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 329 { 330 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 331 PetscErrorCode ierr; 332 PetscInt n,i; 333 PetscScalar *d,*o,*s; 334 335 PetscFunctionBegin; 336 if (!uglyrmapd) { 337 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 338 } 339 340 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 341 342 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 343 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 344 for (i=0; i<n; i++) { 345 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 346 } 347 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 348 /* column scale "diagonal" portion of local matrix */ 349 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 350 351 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 352 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 353 for (i=0; i<n; i++) { 354 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 355 } 356 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 357 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 358 /* column scale "off-diagonal" portion of local matrix */ 359 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 360 361 PetscFunctionReturn(0); 362 } 363 EXTERN_C_END 364 365 366