1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc-private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined(PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 #if defined(PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid--; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 74 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 75 for (i=0; i<B->mbs; i++) { 76 for (j=0; j<B->ilen[i]; j++) { 77 if (!indices[aj[B->i[i] + j]]) ec++; 78 indices[aj[B->i[i] + j]] = 1; 79 } 80 } 81 82 /* form array of columns we need */ 83 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 104 105 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 114 115 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 116 for (i=0; i<ec; i++) stmp[i] = i; 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 123 124 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 128 129 baij->garray = garray; 130 131 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 132 ierr = ISDestroy(&from);CHKERRQ(ierr); 133 ierr = ISDestroy(&to);CHKERRQ(ierr); 134 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 /* 139 Takes the local part of an already assembled MPIBAIJ matrix 140 and disassembles it. This is to allow new nonzeros into the matrix 141 that require more communication in the matrix vector multiply. 142 Thus certain data-structures must be rebuilt. 143 144 Kind of slow! But that's what application programmers get when 145 they are sloppy. 146 */ 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 149 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 150 { 151 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 152 Mat B = baij->B,Bnew; 153 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 154 PetscErrorCode ierr; 155 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 156 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 157 MatScalar *a = Bbaij->a; 158 MatScalar *atmp; 159 160 161 PetscFunctionBegin; 162 /* free stuff related to matrix-vec multiply */ 163 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 164 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 165 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 166 if (baij->colmap) { 167 #if defined(PETSC_USE_CTABLE) 168 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 169 #else 170 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 171 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 172 #endif 173 } 174 175 /* make sure that B is assembled so we can access its values */ 176 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178 179 /* invent new B and copy stuff over */ 180 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 181 for (i=0; i<mbs; i++) { 182 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 183 } 184 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 185 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 186 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 187 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 188 189 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 190 191 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 192 193 for (i=0; i<mbs; i++) { 194 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 195 col = garray[Bbaij->j[j]]; 196 atmp = a + j*bs2; 197 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 198 } 199 } 200 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 201 202 ierr = PetscFree(nz);CHKERRQ(ierr); 203 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 204 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 205 ierr = MatDestroy(&B);CHKERRQ(ierr); 206 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 207 208 baij->B = Bnew; 209 A->was_assembled = PETSC_FALSE; 210 A->assembled = PETSC_FALSE; 211 PetscFunctionReturn(0); 212 } 213 214 /* ugly stuff added for Glenn someday we should fix this up */ 215 216 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 217 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 218 219 220 #undef __FUNCT__ 221 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 222 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 223 { 224 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 225 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 226 PetscErrorCode ierr; 227 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 228 PetscInt *r_rmapd,*r_rmapo; 229 230 PetscFunctionBegin; 231 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 232 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 233 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 234 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 235 nt = 0; 236 for (i=0; i<inA->rmap->bmapping->n; i++) { 237 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 238 nt++; 239 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 240 } 241 } 242 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 243 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 244 for (i=0; i<inA->rmap->bmapping->n; i++) { 245 if (r_rmapd[i]) { 246 for (j=0; j<bs; j++) { 247 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 248 } 249 } 250 } 251 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 252 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 253 254 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 255 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 256 for (i=0; i<B->nbs; i++) { 257 lindices[garray[i]] = i+1; 258 } 259 no = inA->rmap->bmapping->n - nt; 260 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 261 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 262 nt = 0; 263 for (i=0; i<inA->rmap->bmapping->n; i++) { 264 if (lindices[inA->rmap->bmapping->indices[i]]) { 265 nt++; 266 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 267 } 268 } 269 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 270 ierr = PetscFree(lindices);CHKERRQ(ierr); 271 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 272 for (i=0; i<inA->rmap->bmapping->n; i++) { 273 if (r_rmapo[i]) { 274 for (j=0; j<bs; j++) { 275 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 276 } 277 } 278 } 279 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 280 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 286 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 287 { 288 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 289 PetscErrorCode ierr; 290 291 PetscFunctionBegin; 292 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 293 PetscFunctionReturn(0); 294 } 295 296 EXTERN_C_BEGIN 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 299 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 300 { 301 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 302 PetscErrorCode ierr; 303 PetscInt n,i; 304 PetscScalar *d,*o,*s; 305 306 PetscFunctionBegin; 307 if (!uglyrmapd) { 308 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 309 } 310 311 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 312 313 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 314 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 315 for (i=0; i<n; i++) { 316 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 317 } 318 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 319 /* column scale "diagonal" portion of local matrix */ 320 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 321 322 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 323 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 324 for (i=0; i<n; i++) { 325 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 326 } 327 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 328 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 329 /* column scale "off-diagonal" portion of local matrix */ 330 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 EXTERN_C_END 334 335 336