1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined(PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 #if defined(PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<B->mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<B->mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid--; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 66 67 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 68 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 69 #else 70 /* Make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j]] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 103 104 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 105 ierr = PetscFree(indices);CHKERRQ(ierr); 106 #endif 107 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 110 111 /* create two temporary index sets for building scatter-gather */ 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 113 114 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) stmp[i] = i; 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 120 121 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 122 123 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 127 128 baij->garray = garray; 129 130 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 131 ierr = ISDestroy(&from);CHKERRQ(ierr); 132 ierr = ISDestroy(&to);CHKERRQ(ierr); 133 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /* 138 Takes the local part of an already assembled MPIBAIJ matrix 139 and disassembles it. This is to allow new nonzeros into the matrix 140 that require more communication in the matrix vector multiply. 141 Thus certain data-structures must be rebuilt. 142 143 Kind of slow! But that's what application programmers get when 144 they are sloppy. 145 */ 146 #undef __FUNCT__ 147 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 148 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 149 { 150 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 151 Mat B = baij->B,Bnew; 152 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 153 PetscErrorCode ierr; 154 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 155 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 156 MatScalar *a = Bbaij->a; 157 MatScalar *atmp; 158 159 160 PetscFunctionBegin; 161 /* free stuff related to matrix-vec multiply */ 162 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 163 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 164 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 165 if (baij->colmap) { 166 #if defined(PETSC_USE_CTABLE) 167 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 168 #else 169 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 170 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 171 #endif 172 } 173 174 /* make sure that B is assembled so we can access its values */ 175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 178 /* invent new B and copy stuff over */ 179 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 180 for (i=0; i<mbs; i++) { 181 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 182 } 183 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 184 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 187 188 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 189 190 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 191 192 for (i=0; i<mbs; i++) { 193 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 194 col = garray[Bbaij->j[j]]; 195 atmp = a + j*bs2; 196 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 197 } 198 } 199 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 200 201 ierr = PetscFree(nz);CHKERRQ(ierr); 202 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 203 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 204 ierr = MatDestroy(&B);CHKERRQ(ierr); 205 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 206 207 baij->B = Bnew; 208 A->was_assembled = PETSC_FALSE; 209 A->assembled = PETSC_FALSE; 210 PetscFunctionReturn(0); 211 } 212 213 /* ugly stuff added for Glenn someday we should fix this up */ 214 215 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 216 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 217 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 221 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 222 { 223 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 224 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 225 PetscErrorCode ierr; 226 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 227 PetscInt *r_rmapd,*r_rmapo; 228 229 PetscFunctionBegin; 230 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 231 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 232 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 233 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 234 nt = 0; 235 for (i=0; i<inA->rmap->bmapping->n; i++) { 236 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 237 nt++; 238 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 239 } 240 } 241 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 242 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 243 for (i=0; i<inA->rmap->bmapping->n; i++) { 244 if (r_rmapd[i]) { 245 for (j=0; j<bs; j++) { 246 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 247 } 248 } 249 } 250 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 251 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 252 253 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 254 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 255 for (i=0; i<B->nbs; i++) { 256 lindices[garray[i]] = i+1; 257 } 258 no = inA->rmap->bmapping->n - nt; 259 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 260 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 261 nt = 0; 262 for (i=0; i<inA->rmap->bmapping->n; i++) { 263 if (lindices[inA->rmap->bmapping->indices[i]]) { 264 nt++; 265 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 266 } 267 } 268 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 269 ierr = PetscFree(lindices);CHKERRQ(ierr); 270 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 271 for (i=0; i<inA->rmap->bmapping->n; i++) { 272 if (r_rmapo[i]) { 273 for (j=0; j<bs; j++) { 274 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 275 } 276 } 277 } 278 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 279 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 285 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 286 { 287 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 288 PetscErrorCode ierr; 289 290 PetscFunctionBegin; 291 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 EXTERN_C_BEGIN 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 298 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 299 { 300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 301 PetscErrorCode ierr; 302 PetscInt n,i; 303 PetscScalar *d,*o,*s; 304 305 PetscFunctionBegin; 306 if (!uglyrmapd) { 307 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 308 } 309 310 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 311 312 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 313 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 314 for (i=0; i<n; i++) { 315 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 316 } 317 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 318 /* column scale "diagonal" portion of local matrix */ 319 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 320 321 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 322 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 323 for (i=0; i<n; i++) { 324 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 325 } 326 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 327 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 328 /* column scale "off-diagonal" portion of local matrix */ 329 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 330 PetscFunctionReturn(0); 331 } 332 EXTERN_C_END 333 334 335