1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscInt i,j,*aj = B->j,ec = 0,*garray; 13 PetscInt bs = mat->rmap->bs,*stmp; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt Nbs = baij->Nbs,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 #if defined(PETSC_USE_CTABLE) 26 /* use a table - Mark Adams */ 27 CHKERRQ(PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1)); 28 for (i=0; i<B->mbs; i++) { 29 for (j=0; j<B->ilen[i]; j++) { 30 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 31 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data)); 32 if (!data) { 33 /* one based table */ 34 CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 CHKERRQ(PetscMalloc1(ec,&garray)); 40 CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 41 while (tpos) { 42 CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 43 gid--; lid--; 44 garray[lid] = gid; 45 } 46 CHKERRQ(PetscSortInt(ec,garray)); 47 CHKERRQ(PetscTableRemoveAll(gid1_lid1)); 48 for (i=0; i<ec; i++) { 49 CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 50 } 51 /* compact out the extra columns in B */ 52 for (i=0; i<B->mbs; i++) { 53 for (j=0; j<B->ilen[i]; j++) { 54 PetscInt gid1 = aj[B->i[i] + j] + 1; 55 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid)); 56 lid--; 57 aj[B->i[i]+j] = lid; 58 } 59 } 60 B->nbs = ec; 61 CHKERRQ(PetscLayoutDestroy(&baij->B->cmap)); 62 CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap)); 63 CHKERRQ(PetscTableDestroy(&gid1_lid1)); 64 #else 65 /* Make an array as long as the number of columns */ 66 /* mark those columns that are in baij->B */ 67 CHKERRQ(PetscCalloc1(Nbs,&indices)); 68 for (i=0; i<B->mbs; i++) { 69 for (j=0; j<B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form array of columns we need */ 76 CHKERRQ(PetscMalloc1(ec,&garray)); 77 ec = 0; 78 for (i=0; i<Nbs; i++) { 79 if (indices[i]) { 80 garray[ec++] = i; 81 } 82 } 83 84 /* make indices now point into garray */ 85 for (i=0; i<ec; i++) { 86 indices[garray[i]] = i; 87 } 88 89 /* compact out the extra columns in B */ 90 for (i=0; i<B->mbs; i++) { 91 for (j=0; j<B->ilen[i]; j++) { 92 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 93 } 94 } 95 B->nbs = ec; 96 CHKERRQ(PetscLayoutDestroy(&baij->B->cmap)); 97 CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap)); 98 CHKERRQ(PetscFree(indices)); 99 #endif 100 101 /* create local vector that is used to scatter into */ 102 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec)); 103 104 /* create two temporary index sets for building scatter-gather */ 105 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from)); 106 107 CHKERRQ(PetscMalloc1(ec,&stmp)); 108 for (i=0; i<ec; i++) stmp[i] = i; 109 CHKERRQ(ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to)); 110 111 /* create temporary global vector to generate scatter context */ 112 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 113 114 CHKERRQ(VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx)); 115 CHKERRQ(VecScatterViewFromOptions(baij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 116 117 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx)); 118 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec)); 119 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 120 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 121 122 baij->garray = garray; 123 124 CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 125 CHKERRQ(ISDestroy(&from)); 126 CHKERRQ(ISDestroy(&to)); 127 CHKERRQ(VecDestroy(&gvec)); 128 PetscFunctionReturn(0); 129 } 130 131 /* 132 Takes the local part of an already assembled MPIBAIJ matrix 133 and disassembles it. This is to allow new nonzeros into the matrix 134 that require more communication in the matrix vector multiply. 135 Thus certain data-structures must be rebuilt. 136 137 Kind of slow! But that's what application programmers get when 138 they are sloppy. 139 */ 140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 141 { 142 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 143 Mat B = baij->B,Bnew; 144 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 145 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 146 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 147 MatScalar *a = Bbaij->a; 148 MatScalar *atmp; 149 150 PetscFunctionBegin; 151 /* free stuff related to matrix-vec multiply */ 152 CHKERRQ(VecGetSize(baij->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 153 CHKERRQ(VecDestroy(&baij->lvec)); baij->lvec = NULL; 154 CHKERRQ(VecScatterDestroy(&baij->Mvctx)); baij->Mvctx = NULL; 155 if (baij->colmap) { 156 #if defined(PETSC_USE_CTABLE) 157 CHKERRQ(PetscTableDestroy(&baij->colmap)); 158 #else 159 CHKERRQ(PetscFree(baij->colmap)); 160 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt))); 161 #endif 162 } 163 164 /* make sure that B is assembled so we can access its values */ 165 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 166 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 167 168 /* invent new B and copy stuff over */ 169 CHKERRQ(PetscMalloc1(mbs,&nz)); 170 for (i=0; i<mbs; i++) { 171 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 172 } 173 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)B),&Bnew)); 174 CHKERRQ(MatSetSizes(Bnew,m,n,m,n)); 175 CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name)); 176 CHKERRQ(MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz)); 177 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 178 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 179 } 180 181 CHKERRQ(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE)); 182 /* 183 Ensure that B's nonzerostate is monotonically increasing. 184 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 185 */ 186 Bnew->nonzerostate = B->nonzerostate; 187 188 for (i=0; i<mbs; i++) { 189 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 190 col = garray[Bbaij->j[j]]; 191 atmp = a + j*bs2; 192 CHKERRQ(MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode)); 193 } 194 } 195 CHKERRQ(MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE)); 196 197 CHKERRQ(PetscFree(nz)); 198 CHKERRQ(PetscFree(baij->garray)); 199 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 200 CHKERRQ(MatDestroy(&B)); 201 CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 202 203 baij->B = Bnew; 204 A->was_assembled = PETSC_FALSE; 205 A->assembled = PETSC_FALSE; 206 PetscFunctionReturn(0); 207 } 208 209 /* ugly stuff added for Glenn someday we should fix this up */ 210 211 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 212 static Vec uglydd = NULL,uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 213 214 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 215 { 216 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 217 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 218 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 219 PetscInt *r_rmapd,*r_rmapo; 220 221 PetscFunctionBegin; 222 CHKERRQ(MatGetOwnershipRange(inA,&cstart,&cend)); 223 CHKERRQ(MatGetSize(ina->A,NULL,&n)); 224 CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 225 nt = 0; 226 for (i=0; i<inA->rmap->mapping->n; i++) { 227 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 228 nt++; 229 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 230 } 231 } 232 PetscCheckFalse(nt*bs != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT,nt*bs,n); 233 CHKERRQ(PetscMalloc1(n+1,&uglyrmapd)); 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (r_rmapd[i]) { 236 for (j=0; j<bs; j++) { 237 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 238 } 239 } 240 } 241 CHKERRQ(PetscFree(r_rmapd)); 242 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&uglydd)); 243 244 CHKERRQ(PetscCalloc1(ina->Nbs+1,&lindices)); 245 for (i=0; i<B->nbs; i++) { 246 lindices[garray[i]] = i+1; 247 } 248 no = inA->rmap->mapping->n - nt; 249 CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 250 nt = 0; 251 for (i=0; i<inA->rmap->mapping->n; i++) { 252 if (lindices[inA->rmap->mapping->indices[i]]) { 253 nt++; 254 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 255 } 256 } 257 PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 258 CHKERRQ(PetscFree(lindices)); 259 CHKERRQ(PetscMalloc1(nt*bs+1,&uglyrmapo)); 260 for (i=0; i<inA->rmap->mapping->n; i++) { 261 if (r_rmapo[i]) { 262 for (j=0; j<bs; j++) { 263 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 264 } 265 } 266 } 267 CHKERRQ(PetscFree(r_rmapo)); 268 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo)); 269 PetscFunctionReturn(0); 270 } 271 272 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 273 { 274 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 275 276 PetscFunctionBegin; 277 CHKERRQ(PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale))); 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 282 { 283 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 284 PetscInt n,i; 285 PetscScalar *d,*o; 286 const PetscScalar *s; 287 288 PetscFunctionBegin; 289 if (!uglyrmapd) { 290 CHKERRQ(MatMPIBAIJDiagonalScaleLocalSetUp(A,scale)); 291 } 292 293 CHKERRQ(VecGetArrayRead(scale,&s)); 294 295 CHKERRQ(VecGetLocalSize(uglydd,&n)); 296 CHKERRQ(VecGetArray(uglydd,&d)); 297 for (i=0; i<n; i++) { 298 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 299 } 300 CHKERRQ(VecRestoreArray(uglydd,&d)); 301 /* column scale "diagonal" portion of local matrix */ 302 CHKERRQ(MatDiagonalScale(a->A,NULL,uglydd)); 303 304 CHKERRQ(VecGetLocalSize(uglyoo,&n)); 305 CHKERRQ(VecGetArray(uglyoo,&o)); 306 for (i=0; i<n; i++) { 307 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 308 } 309 CHKERRQ(VecRestoreArrayRead(scale,&s)); 310 CHKERRQ(VecRestoreArray(uglyoo,&o)); 311 /* column scale "off-diagonal" portion of local matrix */ 312 CHKERRQ(MatDiagonalScale(a->B,NULL,uglyoo)); 313 PetscFunctionReturn(0); 314 } 315