1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 PetscInt bs = mat->rmap->bs,*stmp; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt Nbs = baij->Nbs,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table - Mark Adams */ 28 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<B->mbs; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; lid--; 45 garray[lid] = gid; 46 } 47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 49 for (i=0; i<ec; i++) { 50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 /* compact out the extra columns in B */ 53 for (i=0; i<B->mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 57 lid--; 58 aj[B->i[i]+j] = lid; 59 } 60 } 61 B->nbs = ec; 62 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 63 64 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 65 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 66 #else 67 /* Make an array as long as the number of columns */ 68 /* mark those columns that are in baij->B */ 69 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr); 70 for (i=0; i<B->mbs; i++) { 71 for (j=0; j<B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j]]) ec++; 73 indices[aj[B->i[i] + j]] = 1; 74 } 75 } 76 77 /* form array of columns we need */ 78 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 79 ec = 0; 80 for (i=0; i<Nbs; i++) { 81 if (indices[i]) { 82 garray[ec++] = i; 83 } 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<B->mbs; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 B->nbs = ec; 98 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 99 100 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 104 /* create local vector that is used to scatter into */ 105 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 106 107 /* create two temporary index sets for building scatter-gather */ 108 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 109 110 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr); 111 for (i=0; i<ec; i++) stmp[i] = i; 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 113 114 /* create temporary global vector to generate scatter context */ 115 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 116 117 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 118 119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 120 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 123 124 baij->garray = garray; 125 126 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 127 ierr = ISDestroy(&from);CHKERRQ(ierr); 128 ierr = ISDestroy(&to);CHKERRQ(ierr); 129 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* 134 Takes the local part of an already assembled MPIBAIJ matrix 135 and disassembles it. This is to allow new nonzeros into the matrix 136 that require more communication in the matrix vector multiply. 137 Thus certain data-structures must be rebuilt. 138 139 Kind of slow! But that's what application programmers get when 140 they are sloppy. 141 */ 142 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 143 { 144 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 145 Mat B = baij->B,Bnew; 146 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 149 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 150 MatScalar *a = Bbaij->a; 151 MatScalar *atmp; 152 153 154 PetscFunctionBegin; 155 /* free stuff related to matrix-vec multiply */ 156 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 157 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 158 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 159 if (baij->colmap) { 160 #if defined(PETSC_USE_CTABLE) 161 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 162 #else 163 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 164 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 165 #endif 166 } 167 168 /* make sure that B is assembled so we can access its values */ 169 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 172 /* invent new B and copy stuff over */ 173 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 174 for (i=0; i<mbs; i++) { 175 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 176 } 177 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 178 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 179 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 180 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 181 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 182 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 183 } 184 185 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 186 /* 187 Ensure that B's nonzerostate is monotonically increasing. 188 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 189 */ 190 Bnew->nonzerostate = B->nonzerostate; 191 192 for (i=0; i<mbs; i++) { 193 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 194 col = garray[Bbaij->j[j]]; 195 atmp = a + j*bs2; 196 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 197 } 198 } 199 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 200 201 ierr = PetscFree(nz);CHKERRQ(ierr); 202 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 203 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 204 ierr = MatDestroy(&B);CHKERRQ(ierr); 205 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 206 207 baij->B = Bnew; 208 A->was_assembled = PETSC_FALSE; 209 A->assembled = PETSC_FALSE; 210 PetscFunctionReturn(0); 211 } 212 213 /* ugly stuff added for Glenn someday we should fix this up */ 214 215 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 216 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 217 218 219 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 220 { 221 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 222 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 223 PetscErrorCode ierr; 224 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 225 PetscInt *r_rmapd,*r_rmapo; 226 227 PetscFunctionBegin; 228 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 229 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 230 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 231 nt = 0; 232 for (i=0; i<inA->rmap->mapping->n; i++) { 233 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 234 nt++; 235 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 236 } 237 } 238 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 239 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 240 for (i=0; i<inA->rmap->mapping->n; i++) { 241 if (r_rmapd[i]) { 242 for (j=0; j<bs; j++) { 243 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 244 } 245 } 246 } 247 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 248 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 249 250 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 251 for (i=0; i<B->nbs; i++) { 252 lindices[garray[i]] = i+1; 253 } 254 no = inA->rmap->mapping->n - nt; 255 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rmap->mapping->n; i++) { 258 if (lindices[inA->rmap->mapping->indices[i]]) { 259 nt++; 260 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 261 } 262 } 263 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 264 ierr = PetscFree(lindices);CHKERRQ(ierr); 265 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 266 for (i=0; i<inA->rmap->mapping->n; i++) { 267 if (r_rmapo[i]) { 268 for (j=0; j<bs; j++) { 269 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 270 } 271 } 272 } 273 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 274 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 279 { 280 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 281 PetscErrorCode ierr; 282 283 PetscFunctionBegin; 284 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 289 { 290 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 291 PetscErrorCode ierr; 292 PetscInt n,i; 293 PetscScalar *d,*o; 294 const PetscScalar *s; 295 296 PetscFunctionBegin; 297 if (!uglyrmapd) { 298 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 299 } 300 301 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 302 303 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 304 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 305 for (i=0; i<n; i++) { 306 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 307 } 308 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 309 /* column scale "diagonal" portion of local matrix */ 310 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 311 312 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 313 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 314 for (i=0; i<n; i++) { 315 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 316 } 317 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 318 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 319 /* column scale "off-diagonal" portion of local matrix */ 320 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323