1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 #if defined (PETSC_USE_CTABLE) 30 /* use a table - Mark Adams */ 31 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<B->mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<B->mbs; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i]+j] = lid; 62 } 63 } 64 B->nbs = ec; 65 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 66 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in baij->B */ 71 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 72 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 73 for (i=0; i<B->mbs; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j]] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 82 ec = 0; 83 for (i=0; i<Nbs; i++) { 84 if (indices[i]) { 85 garray[ec++] = i; 86 } 87 } 88 89 /* make indices now point into garray */ 90 for (i=0; i<ec; i++) { 91 indices[garray[i]] = i; 92 } 93 94 /* compact out the extra columns in B */ 95 for (i=0; i<B->mbs; i++) { 96 for (j=0; j<B->ilen[i]; j++) { 97 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98 } 99 } 100 B->nbs = ec; 101 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 102 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 103 ierr = PetscFree(indices);CHKERRQ(ierr); 104 #endif 105 106 /* create local vector that is used to scatter into */ 107 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 108 109 /* create two temporary index sets for building scatter-gather */ 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 111 112 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 113 for (i=0; i<ec; i++) { stmp[i] = i; } 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 115 116 /* create temporary global vector to generate scatter context */ 117 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 118 119 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 120 121 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 125 baij->garray = garray; 126 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 127 ierr = ISDestroy(&from);CHKERRQ(ierr); 128 ierr = ISDestroy(&to);CHKERRQ(ierr); 129 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* 134 Takes the local part of an already assembled MPIBAIJ matrix 135 and disassembles it. This is to allow new nonzeros into the matrix 136 that require more communication in the matrix vector multiply. 137 Thus certain data-structures must be rebuilt. 138 139 Kind of slow! But that's what application programmers get when 140 they are sloppy. 141 */ 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 144 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 145 { 146 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 147 Mat B = baij->B,Bnew; 148 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 149 PetscErrorCode ierr; 150 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 151 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 152 MatScalar *a = Bbaij->a; 153 MatScalar *atmp; 154 155 156 PetscFunctionBegin; 157 /* free stuff related to matrix-vec multiply */ 158 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 159 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 160 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 161 if (baij->colmap) { 162 #if defined (PETSC_USE_CTABLE) 163 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 164 #else 165 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 166 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 167 #endif 168 } 169 170 /* make sure that B is assembled so we can access its values */ 171 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 172 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 174 /* invent new B and copy stuff over */ 175 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 176 for (i=0; i<mbs; i++) { 177 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 178 } 179 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 180 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 181 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 182 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 183 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 184 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 186 for (i=0; i<mbs; i++) { 187 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 188 col = garray[Bbaij->j[j]]; 189 atmp = a + j*bs2; 190 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 191 } 192 } 193 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 194 195 ierr = PetscFree(nz);CHKERRQ(ierr); 196 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 197 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 198 ierr = MatDestroy(&B);CHKERRQ(ierr); 199 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 200 baij->B = Bnew; 201 A->was_assembled = PETSC_FALSE; 202 A->assembled = PETSC_FALSE; 203 PetscFunctionReturn(0); 204 } 205 206 /* ugly stuff added for Glenn someday we should fix this up */ 207 208 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 209 parts of the local matrix */ 210 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 211 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 218 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 219 PetscErrorCode ierr; 220 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 221 PetscInt *r_rmapd,*r_rmapo; 222 223 PetscFunctionBegin; 224 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 225 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 226 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 227 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 228 nt = 0; 229 for (i=0; i<inA->rmap->bmapping->n; i++) { 230 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 231 nt++; 232 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 233 } 234 } 235 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 236 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 237 for (i=0; i<inA->rmap->bmapping->n; i++) { 238 if (r_rmapd[i]) { 239 for (j=0; j<bs; j++) { 240 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 241 } 242 } 243 } 244 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 245 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 246 247 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 248 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 249 for (i=0; i<B->nbs; i++) { 250 lindices[garray[i]] = i+1; 251 } 252 no = inA->rmap->bmapping->n - nt; 253 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 254 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->rmap->bmapping->n; i++) { 257 if (lindices[inA->rmap->bmapping->indices[i]]) { 258 nt++; 259 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 260 } 261 } 262 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 263 ierr = PetscFree(lindices);CHKERRQ(ierr); 264 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->bmapping->n; i++) { 266 if (r_rmapo[i]) { 267 for (j=0; j<bs; j++) { 268 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 269 } 270 } 271 } 272 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 274 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 280 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 281 { 282 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 EXTERN_C_BEGIN 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 293 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 294 { 295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 296 PetscErrorCode ierr; 297 PetscInt n,i; 298 PetscScalar *d,*o,*s; 299 300 PetscFunctionBegin; 301 if (!uglyrmapd) { 302 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 303 } 304 305 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 306 307 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 308 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 309 for (i=0; i<n; i++) { 310 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 311 } 312 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 313 /* column scale "diagonal" portion of local matrix */ 314 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 315 316 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 317 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 318 for (i=0; i<n; i++) { 319 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 320 } 321 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 322 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 323 /* column scale "off-diagonal" portion of local matrix */ 324 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 325 326 PetscFunctionReturn(0); 327 } 328 EXTERN_C_END 329 330 331