1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 11 { 12 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 13 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 PetscInt bs = mat->rmap->bs,*stmp; 17 IS from,to; 18 Vec gvec; 19 #if defined(PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt Nbs = baij->Nbs,*indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 /* use a table - Mark Adams */ 30 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<B->mbs; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i]+j] = lid; 61 } 62 } 63 B->nbs = ec; 64 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 65 66 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<B->mbs; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<Nbs; i++) { 83 if (indices[i]) { 84 garray[ec++] = i; 85 } 86 } 87 88 /* make indices now point into garray */ 89 for (i=0; i<ec; i++) { 90 indices[garray[i]] = i; 91 } 92 93 /* compact out the extra columns in B */ 94 for (i=0; i<B->mbs; i++) { 95 for (j=0; j<B->ilen[i]; j++) { 96 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 97 } 98 } 99 B->nbs = ec; 100 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 101 102 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 103 ierr = PetscFree(indices);CHKERRQ(ierr); 104 #endif 105 106 /* create local vector that is used to scatter into */ 107 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 108 109 /* create two temporary index sets for building scatter-gather */ 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 111 112 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr); 113 for (i=0; i<ec; i++) stmp[i] = i; 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 115 116 /* create temporary global vector to generate scatter context */ 117 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 118 119 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 120 121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 125 126 baij->garray = garray; 127 128 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 129 ierr = ISDestroy(&from);CHKERRQ(ierr); 130 ierr = ISDestroy(&to);CHKERRQ(ierr); 131 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 /* 136 Takes the local part of an already assembled MPIBAIJ matrix 137 and disassembles it. This is to allow new nonzeros into the matrix 138 that require more communication in the matrix vector multiply. 139 Thus certain data-structures must be rebuilt. 140 141 Kind of slow! But that's what application programmers get when 142 they are sloppy. 143 */ 144 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 145 { 146 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 147 Mat B = baij->B,Bnew; 148 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 149 PetscErrorCode ierr; 150 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 151 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 152 MatScalar *a = Bbaij->a; 153 MatScalar *atmp; 154 155 156 PetscFunctionBegin; 157 /* free stuff related to matrix-vec multiply */ 158 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 159 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 160 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 161 if (baij->colmap) { 162 #if defined(PETSC_USE_CTABLE) 163 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 164 #else 165 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 166 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 167 #endif 168 } 169 170 /* make sure that B is assembled so we can access its values */ 171 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 174 /* invent new B and copy stuff over */ 175 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 176 for (i=0; i<mbs; i++) { 177 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 178 } 179 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 180 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 181 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 182 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 183 184 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 185 186 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 187 /* 188 Ensure that B's nonzerostate is monotonically increasing. 189 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 190 */ 191 Bnew->nonzerostate = B->nonzerostate; 192 193 for (i=0; i<mbs; i++) { 194 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 195 col = garray[Bbaij->j[j]]; 196 atmp = a + j*bs2; 197 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 198 } 199 } 200 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 201 202 ierr = PetscFree(nz);CHKERRQ(ierr); 203 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 204 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 205 ierr = MatDestroy(&B);CHKERRQ(ierr); 206 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 207 208 baij->B = Bnew; 209 A->was_assembled = PETSC_FALSE; 210 A->assembled = PETSC_FALSE; 211 PetscFunctionReturn(0); 212 } 213 214 /* ugly stuff added for Glenn someday we should fix this up */ 215 216 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 217 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 218 219 220 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 221 { 222 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 223 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 224 PetscErrorCode ierr; 225 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 226 PetscInt *r_rmapd,*r_rmapo; 227 228 PetscFunctionBegin; 229 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 230 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 231 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 232 nt = 0; 233 for (i=0; i<inA->rmap->mapping->n; i++) { 234 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 235 nt++; 236 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 237 } 238 } 239 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 240 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 241 for (i=0; i<inA->rmap->mapping->n; i++) { 242 if (r_rmapd[i]) { 243 for (j=0; j<bs; j++) { 244 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 245 } 246 } 247 } 248 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 249 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 250 251 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 252 for (i=0; i<B->nbs; i++) { 253 lindices[garray[i]] = i+1; 254 } 255 no = inA->rmap->mapping->n - nt; 256 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 257 nt = 0; 258 for (i=0; i<inA->rmap->mapping->n; i++) { 259 if (lindices[inA->rmap->mapping->indices[i]]) { 260 nt++; 261 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 262 } 263 } 264 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 265 ierr = PetscFree(lindices);CHKERRQ(ierr); 266 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 267 for (i=0; i<inA->rmap->mapping->n; i++) { 268 if (r_rmapo[i]) { 269 for (j=0; j<bs; j++) { 270 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 271 } 272 } 273 } 274 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 275 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 280 { 281 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 290 { 291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 292 PetscErrorCode ierr; 293 PetscInt n,i; 294 PetscScalar *d,*o,*s; 295 296 PetscFunctionBegin; 297 if (!uglyrmapd) { 298 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 299 } 300 301 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 302 303 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 304 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 305 for (i=0; i<n; i++) { 306 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 307 } 308 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 309 /* column scale "diagonal" portion of local matrix */ 310 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 311 312 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 313 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 314 for (i=0; i<n; i++) { 315 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 316 } 317 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 318 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 319 /* column scale "off-diagonal" portion of local matrix */ 320 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323