1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscInt i,j,*aj = B->j,ec = 0,*garray; 13 PetscInt bs = mat->rmap->bs,*stmp; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt Nbs = baij->Nbs,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 #if defined(PETSC_USE_CTABLE) 26 /* use a table - Mark Adams */ 27 PetscCall(PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1)); 28 for (i=0; i<B->mbs; i++) { 29 for (j=0; j<B->ilen[i]; j++) { 30 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 31 PetscCall(PetscTableFind(gid1_lid1,gid1,&data)); 32 if (!data) { 33 /* one based table */ 34 PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 PetscCall(PetscMalloc1(ec,&garray)); 40 PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 41 while (tpos) { 42 PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 43 gid--; lid--; 44 garray[lid] = gid; 45 } 46 PetscCall(PetscSortInt(ec,garray)); 47 PetscCall(PetscTableRemoveAll(gid1_lid1)); 48 for (i=0; i<ec; i++) { 49 PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 50 } 51 /* compact out the extra columns in B */ 52 for (i=0; i<B->mbs; i++) { 53 for (j=0; j<B->ilen[i]; j++) { 54 PetscInt gid1 = aj[B->i[i] + j] + 1; 55 PetscCall(PetscTableFind(gid1_lid1,gid1,&lid)); 56 lid--; 57 aj[B->i[i]+j] = lid; 58 } 59 } 60 B->nbs = ec; 61 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 62 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap)); 63 PetscCall(PetscTableDestroy(&gid1_lid1)); 64 #else 65 /* Make an array as long as the number of columns */ 66 /* mark those columns that are in baij->B */ 67 PetscCall(PetscCalloc1(Nbs,&indices)); 68 for (i=0; i<B->mbs; i++) { 69 for (j=0; j<B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form array of columns we need */ 76 PetscCall(PetscMalloc1(ec,&garray)); 77 ec = 0; 78 for (i=0; i<Nbs; i++) { 79 if (indices[i]) { 80 garray[ec++] = i; 81 } 82 } 83 84 /* make indices now point into garray */ 85 for (i=0; i<ec; i++) { 86 indices[garray[i]] = i; 87 } 88 89 /* compact out the extra columns in B */ 90 for (i=0; i<B->mbs; i++) { 91 for (j=0; j<B->ilen[i]; j++) { 92 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 93 } 94 } 95 B->nbs = ec; 96 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 97 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap)); 98 PetscCall(PetscFree(indices)); 99 #endif 100 101 /* create local vector that is used to scatter into */ 102 PetscCall(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec)); 103 104 /* create two temporary index sets for building scatter-gather */ 105 PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from)); 106 107 PetscCall(PetscMalloc1(ec,&stmp)); 108 for (i=0; i<ec; i++) stmp[i] = i; 109 PetscCall(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to)); 110 111 /* create temporary global vector to generate scatter context */ 112 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 113 114 PetscCall(VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx)); 115 PetscCall(VecScatterViewFromOptions(baij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 116 117 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx)); 118 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec)); 119 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 120 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 121 122 baij->garray = garray; 123 124 PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 125 PetscCall(ISDestroy(&from)); 126 PetscCall(ISDestroy(&to)); 127 PetscCall(VecDestroy(&gvec)); 128 PetscFunctionReturn(0); 129 } 130 131 /* 132 Takes the local part of an already assembled MPIBAIJ matrix 133 and disassembles it. This is to allow new nonzeros into the matrix 134 that require more communication in the matrix vector multiply. 135 Thus certain data-structures must be rebuilt. 136 137 Kind of slow! But that's what application programmers get when 138 they are sloppy. 139 */ 140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 141 { 142 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 143 Mat B = baij->B,Bnew; 144 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 145 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 146 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 147 MatScalar *a = Bbaij->a; 148 MatScalar *atmp; 149 150 PetscFunctionBegin; 151 /* free stuff related to matrix-vec multiply */ 152 PetscCall(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 153 PetscCall(VecDestroy(&baij->lvec)); baij->lvec = NULL; 154 PetscCall(VecScatterDestroy(&baij->Mvctx)); baij->Mvctx = NULL; 155 if (baij->colmap) { 156 #if defined(PETSC_USE_CTABLE) 157 PetscCall(PetscTableDestroy(&baij->colmap)); 158 #else 159 PetscCall(PetscFree(baij->colmap)); 160 PetscCall(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt))); 161 #endif 162 } 163 164 /* make sure that B is assembled so we can access its values */ 165 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 166 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 167 168 /* invent new B and copy stuff over */ 169 PetscCall(PetscMalloc1(mbs,&nz)); 170 for (i=0; i<mbs; i++) { 171 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 172 } 173 PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&Bnew)); 174 PetscCall(MatSetSizes(Bnew,m,n,m,n)); 175 PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name)); 176 PetscCall(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz)); 177 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 178 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 179 } 180 181 PetscCall(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE)); 182 /* 183 Ensure that B's nonzerostate is monotonically increasing. 184 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 185 */ 186 Bnew->nonzerostate = B->nonzerostate; 187 188 for (i=0; i<mbs; i++) { 189 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 190 col = garray[Bbaij->j[j]]; 191 atmp = a + j*bs2; 192 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode)); 193 } 194 } 195 PetscCall(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE)); 196 197 PetscCall(PetscFree(nz)); 198 PetscCall(PetscFree(baij->garray)); 199 PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 200 PetscCall(MatDestroy(&B)); 201 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 202 203 baij->B = Bnew; 204 A->was_assembled = PETSC_FALSE; 205 A->assembled = PETSC_FALSE; 206 PetscFunctionReturn(0); 207 } 208 209 /* ugly stuff added for Glenn someday we should fix this up */ 210 211 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 212 static Vec uglydd = NULL,uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 213 214 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 215 { 216 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 217 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 218 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 219 PetscInt *r_rmapd,*r_rmapo; 220 221 PetscFunctionBegin; 222 PetscCall(MatGetOwnershipRange(inA,&cstart,&cend)); 223 PetscCall(MatGetSize(ina->A,NULL,&n)); 224 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 225 nt = 0; 226 for (i=0; i<inA->rmap->mapping->n; i++) { 227 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 228 nt++; 229 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 230 } 231 } 232 PetscCheckFalse(nt*bs != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT,nt*bs,n); 233 PetscCall(PetscMalloc1(n+1,&uglyrmapd)); 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (r_rmapd[i]) { 236 for (j=0; j<bs; j++) { 237 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 238 } 239 } 240 } 241 PetscCall(PetscFree(r_rmapd)); 242 PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&uglydd)); 243 244 PetscCall(PetscCalloc1(ina->Nbs+1,&lindices)); 245 for (i=0; i<B->nbs; i++) { 246 lindices[garray[i]] = i+1; 247 } 248 no = inA->rmap->mapping->n - nt; 249 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 250 nt = 0; 251 for (i=0; i<inA->rmap->mapping->n; i++) { 252 if (lindices[inA->rmap->mapping->indices[i]]) { 253 nt++; 254 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 255 } 256 } 257 PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 258 PetscCall(PetscFree(lindices)); 259 PetscCall(PetscMalloc1(nt*bs+1,&uglyrmapo)); 260 for (i=0; i<inA->rmap->mapping->n; i++) { 261 if (r_rmapo[i]) { 262 for (j=0; j<bs; j++) { 263 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 264 } 265 } 266 } 267 PetscCall(PetscFree(r_rmapo)); 268 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo)); 269 PetscFunctionReturn(0); 270 } 271 272 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 273 { 274 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 275 276 PetscFunctionBegin; 277 PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale)); 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 282 { 283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 284 PetscInt n,i; 285 PetscScalar *d,*o; 286 const PetscScalar *s; 287 288 PetscFunctionBegin; 289 if (!uglyrmapd) { 290 PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A,scale)); 291 } 292 293 PetscCall(VecGetArrayRead(scale,&s)); 294 295 PetscCall(VecGetLocalSize(uglydd,&n)); 296 PetscCall(VecGetArray(uglydd,&d)); 297 for (i=0; i<n; i++) { 298 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 299 } 300 PetscCall(VecRestoreArray(uglydd,&d)); 301 /* column scale "diagonal" portion of local matrix */ 302 PetscCall(MatDiagonalScale(a->A,NULL,uglydd)); 303 304 PetscCall(VecGetLocalSize(uglyoo,&n)); 305 PetscCall(VecGetArray(uglyoo,&o)); 306 for (i=0; i<n; i++) { 307 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 308 } 309 PetscCall(VecRestoreArrayRead(scale,&s)); 310 PetscCall(VecRestoreArray(uglyoo,&o)); 311 /* column scale "off-diagonal" portion of local matrix */ 312 PetscCall(MatDiagonalScale(a->B,NULL,uglyoo)); 313 PetscFunctionReturn(0); 314 } 315