1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 68 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 69 #else 70 /* Make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j]] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 103 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 109 110 /* create two temporary index sets for building scatter-gather */ 111 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 112 113 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 114 for (i=0; i<ec; i++) { stmp[i] = i; } 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 116 117 /* create temporary global vector to generate scatter context */ 118 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 119 120 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 121 122 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 126 baij->garray = garray; 127 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 ierr = ISDestroy(&from);CHKERRQ(ierr); 129 ierr = ISDestroy(&to);CHKERRQ(ierr); 130 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 /* 135 Takes the local part of an already assembled MPIBAIJ matrix 136 and disassembles it. This is to allow new nonzeros into the matrix 137 that require more communication in the matrix vector multiply. 138 Thus certain data-structures must be rebuilt. 139 140 Kind of slow! But that's what application programmers get when 141 they are sloppy. 142 */ 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatDisAssemble_MPIBAIJ" 145 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 146 { 147 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 148 Mat B = baij->B,Bnew; 149 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 150 PetscErrorCode ierr; 151 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 152 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 153 MatScalar *a = Bbaij->a; 154 MatScalar *atmp; 155 156 157 PetscFunctionBegin; 158 /* free stuff related to matrix-vec multiply */ 159 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 160 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 161 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 162 if (baij->colmap) { 163 #if defined (PETSC_USE_CTABLE) 164 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 165 #else 166 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 167 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 168 #endif 169 } 170 171 /* make sure that B is assembled so we can access its values */ 172 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 174 175 /* invent new B and copy stuff over */ 176 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 177 for (i=0; i<mbs; i++) { 178 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 179 } 180 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 181 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 182 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 183 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 184 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 185 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 186 187 for (i=0; i<mbs; i++) { 188 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 189 col = garray[Bbaij->j[j]]; 190 atmp = a + j*bs2; 191 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 192 } 193 } 194 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 195 196 ierr = PetscFree(nz);CHKERRQ(ierr); 197 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = MatDestroy(&B);CHKERRQ(ierr); 200 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 201 baij->B = Bnew; 202 A->was_assembled = PETSC_FALSE; 203 A->assembled = PETSC_FALSE; 204 PetscFunctionReturn(0); 205 } 206 207 /* ugly stuff added for Glenn someday we should fix this up */ 208 209 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 210 parts of the local matrix */ 211 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 212 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 216 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 217 { 218 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 219 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 220 PetscErrorCode ierr; 221 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 222 PetscInt *r_rmapd,*r_rmapo; 223 224 PetscFunctionBegin; 225 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 226 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 227 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 228 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 229 nt = 0; 230 for (i=0; i<inA->rmap->bmapping->n; i++) { 231 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 232 nt++; 233 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 234 } 235 } 236 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 237 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 238 for (i=0; i<inA->rmap->bmapping->n; i++) { 239 if (r_rmapd[i]){ 240 for (j=0; j<bs; j++) { 241 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 242 } 243 } 244 } 245 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 246 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 247 248 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 249 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 250 for (i=0; i<B->nbs; i++) { 251 lindices[garray[i]] = i+1; 252 } 253 no = inA->rmap->bmapping->n - nt; 254 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 255 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rmap->bmapping->n; i++) { 258 if (lindices[inA->rmap->bmapping->indices[i]]) { 259 nt++; 260 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 261 } 262 } 263 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 264 ierr = PetscFree(lindices);CHKERRQ(ierr); 265 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 266 for (i=0; i<inA->rmap->bmapping->n; i++) { 267 if (r_rmapo[i]){ 268 for (j=0; j<bs; j++) { 269 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 270 } 271 } 272 } 273 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 274 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 275 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 281 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 282 { 283 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 EXTERN_C_BEGIN 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 294 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 295 { 296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 297 PetscErrorCode ierr; 298 PetscInt n,i; 299 PetscScalar *d,*o,*s; 300 301 PetscFunctionBegin; 302 if (!uglyrmapd) { 303 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 304 } 305 306 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 307 308 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 309 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 310 for (i=0; i<n; i++) { 311 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 312 } 313 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 314 /* column scale "diagonal" portion of local matrix */ 315 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 316 317 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 318 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 319 for (i=0; i<n; i++) { 320 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 321 } 322 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 323 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 324 /* column scale "off-diagonal" portion of local matrix */ 325 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 326 327 PetscFunctionReturn(0); 328 } 329 EXTERN_C_END 330 331 332