1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap.bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap.n = baij->B->cmap.N = ec*mat->rmap.bs; 68 ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap.n =baij->B->cmap.N = ec*mat->rmap.bs; 105 ierr = PetscMapSetUp(&(baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 for (i=0; i<ec; i++) { 114 garray[i] = bs*garray[i]; 115 } 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 117 for (i=0; i<ec; i++) { 118 garray[i] = garray[i]/bs; 119 } 120 121 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 122 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 123 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 124 ierr = PetscFree(stmp);CHKERRQ(ierr); 125 126 /* create temporary global vector to generate scatter context */ 127 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap.n,mat->cmap.N,PETSC_NULL,&gvec);CHKERRQ(ierr); 128 129 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 130 131 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 135 baij->garray = garray; 136 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 137 ierr = ISDestroy(from);CHKERRQ(ierr); 138 ierr = ISDestroy(to);CHKERRQ(ierr); 139 ierr = VecDestroy(gvec);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* 144 Takes the local part of an already assembled MPIBAIJ matrix 145 and disassembles it. This is to allow new nonzeros into the matrix 146 that require more communication in the matrix vector multiply. 147 Thus certain data-structures must be rebuilt. 148 149 Kind of slow! But that's what application programmers get when 150 they are sloppy. 151 */ 152 #undef __FUNCT__ 153 #define __FUNCT__ "DisAssemble_MPIBAIJ" 154 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 155 { 156 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 157 Mat B = baij->B,Bnew; 158 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 159 PetscErrorCode ierr; 160 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap.N,col,*garray=baij->garray; 161 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap.n; 162 MatScalar *a = Bbaij->a; 163 PetscScalar *atmp; 164 #if defined(PETSC_USE_MAT_SINGLE) 165 PetscInt k; 166 #endif 167 168 PetscFunctionBegin; 169 /* free stuff related to matrix-vec multiply */ 170 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 171 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 172 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 173 if (baij->colmap) { 174 #if defined (PETSC_USE_CTABLE) 175 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 176 #else 177 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 178 baij->colmap = 0; 179 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 180 #endif 181 } 182 183 /* make sure that B is assembled so we can access its values */ 184 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 185 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 186 187 /* invent new B and copy stuff over */ 188 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 189 for (i=0; i<mbs; i++) { 190 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 191 } 192 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 193 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 194 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 195 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap.bs,0,nz);CHKERRQ(ierr); 196 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 197 198 #if defined(PETSC_USE_MAT_SINGLE) 199 ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr); 200 #endif 201 for (i=0; i<mbs; i++) { 202 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 203 col = garray[Bbaij->j[j]]; 204 #if defined(PETSC_USE_MAT_SINGLE) 205 for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k]; 206 #else 207 atmp = a + j*bs2; 208 #endif 209 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 210 } 211 } 212 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 213 214 #if defined(PETSC_USE_MAT_SINGLE) 215 ierr = PetscFree(atmp);CHKERRQ(ierr); 216 #endif 217 218 ierr = PetscFree(nz);CHKERRQ(ierr); 219 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 220 baij->garray = 0; 221 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 222 ierr = MatDestroy(B);CHKERRQ(ierr); 223 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 224 baij->B = Bnew; 225 A->was_assembled = PETSC_FALSE; 226 PetscFunctionReturn(0); 227 } 228 229 /* ugly stuff added for Glenn someday we should fix this up */ 230 231 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 232 parts of the local matrix */ 233 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 234 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 238 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 239 { 240 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 241 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 242 PetscErrorCode ierr; 243 PetscInt bs = inA->rmap.bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 244 PetscInt *r_rmapd,*r_rmapo; 245 246 PetscFunctionBegin; 247 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 248 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 249 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 250 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 251 nt = 0; 252 for (i=0; i<inA->bmapping->n; i++) { 253 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 254 nt++; 255 r_rmapd[i] = inA->bmapping->indices[i] + 1; 256 } 257 } 258 if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 259 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 260 for (i=0; i<inA->bmapping->n; i++) { 261 if (r_rmapd[i]){ 262 for (j=0; j<bs; j++) { 263 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 264 } 265 } 266 } 267 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 268 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 269 270 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 271 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 272 for (i=0; i<B->nbs; i++) { 273 lindices[garray[i]] = i+1; 274 } 275 no = inA->bmapping->n - nt; 276 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 277 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 278 nt = 0; 279 for (i=0; i<inA->bmapping->n; i++) { 280 if (lindices[inA->bmapping->indices[i]]) { 281 nt++; 282 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 283 } 284 } 285 if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 286 ierr = PetscFree(lindices);CHKERRQ(ierr); 287 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 288 for (i=0; i<inA->bmapping->n; i++) { 289 if (r_rmapo[i]){ 290 for (j=0; j<bs; j++) { 291 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 292 } 293 } 294 } 295 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 296 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 297 298 PetscFunctionReturn(0); 299 } 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 303 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 304 { 305 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 306 PetscErrorCode ierr,(*f)(Mat,Vec); 307 308 PetscFunctionBegin; 309 ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr); 310 if (f) { 311 ierr = (*f)(A,scale);CHKERRQ(ierr); 312 } 313 PetscFunctionReturn(0); 314 } 315 316 EXTERN_C_BEGIN 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 319 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 320 { 321 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 322 PetscErrorCode ierr; 323 PetscInt n,i; 324 PetscScalar *d,*o,*s; 325 326 PetscFunctionBegin; 327 if (!uglyrmapd) { 328 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 329 } 330 331 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 332 333 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 334 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 335 for (i=0; i<n; i++) { 336 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 337 } 338 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 339 /* column scale "diagonal" portion of local matrix */ 340 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 341 342 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 343 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 344 for (i=0; i<n; i++) { 345 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 346 } 347 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 348 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 349 /* column scale "off-diagonal" portion of local matrix */ 350 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 351 352 PetscFunctionReturn(0); 353 } 354 EXTERN_C_END 355 356 357