1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/baij/mpi/mpibaij.h" 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 105 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 114 115 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 116 for (i=0; i<ec; i++) { stmp[i] = i; } 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 121 122 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 123 124 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 128 baij->garray = garray; 129 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 130 ierr = ISDestroy(from);CHKERRQ(ierr); 131 ierr = ISDestroy(to);CHKERRQ(ierr); 132 ierr = VecDestroy(gvec);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* 137 Takes the local part of an already assembled MPIBAIJ matrix 138 and disassembles it. This is to allow new nonzeros into the matrix 139 that require more communication in the matrix vector multiply. 140 Thus certain data-structures must be rebuilt. 141 142 Kind of slow! But that's what application programmers get when 143 they are sloppy. 144 */ 145 #undef __FUNCT__ 146 #define __FUNCT__ "DisAssemble_MPIBAIJ" 147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 148 { 149 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 150 Mat B = baij->B,Bnew; 151 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 152 PetscErrorCode ierr; 153 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 154 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 155 MatScalar *a = Bbaij->a; 156 MatScalar *atmp; 157 158 159 PetscFunctionBegin; 160 /* free stuff related to matrix-vec multiply */ 161 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 162 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 163 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 164 if (baij->colmap) { 165 #if defined (PETSC_USE_CTABLE) 166 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 167 #else 168 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 169 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 170 #endif 171 } 172 173 /* make sure that B is assembled so we can access its values */ 174 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 175 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 177 /* invent new B and copy stuff over */ 178 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 179 for (i=0; i<mbs; i++) { 180 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 181 } 182 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 183 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 184 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 185 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 186 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 187 188 for (i=0; i<mbs; i++) { 189 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 190 col = garray[Bbaij->j[j]]; 191 atmp = a + j*bs2; 192 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 193 } 194 } 195 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 196 197 ierr = PetscFree(nz);CHKERRQ(ierr); 198 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 200 ierr = MatDestroy(B);CHKERRQ(ierr); 201 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 202 baij->B = Bnew; 203 A->was_assembled = PETSC_FALSE; 204 A->assembled = PETSC_FALSE; 205 PetscFunctionReturn(0); 206 } 207 208 /* ugly stuff added for Glenn someday we should fix this up */ 209 210 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 211 parts of the local matrix */ 212 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 213 214 215 #undef __FUNCT__ 216 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 217 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 218 { 219 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 220 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 221 PetscErrorCode ierr; 222 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 223 PetscInt *r_rmapd,*r_rmapo; 224 225 PetscFunctionBegin; 226 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 227 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 228 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 229 ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 230 nt = 0; 231 for (i=0; i<inA->rbmapping->n; i++) { 232 if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) { 233 nt++; 234 r_rmapd[i] = inA->rbmapping->indices[i] + 1; 235 } 236 } 237 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 238 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 239 for (i=0; i<inA->rbmapping->n; i++) { 240 if (r_rmapd[i]){ 241 for (j=0; j<bs; j++) { 242 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 243 } 244 } 245 } 246 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 247 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 248 249 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 250 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 251 for (i=0; i<B->nbs; i++) { 252 lindices[garray[i]] = i+1; 253 } 254 no = inA->rbmapping->n - nt; 255 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 256 ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 257 nt = 0; 258 for (i=0; i<inA->rbmapping->n; i++) { 259 if (lindices[inA->rbmapping->indices[i]]) { 260 nt++; 261 r_rmapo[i] = lindices[inA->rbmapping->indices[i]]; 262 } 263 } 264 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 265 ierr = PetscFree(lindices);CHKERRQ(ierr); 266 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 267 for (i=0; i<inA->rbmapping->n; i++) { 268 if (r_rmapo[i]){ 269 for (j=0; j<bs; j++) { 270 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 271 } 272 } 273 } 274 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 275 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 276 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 282 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 283 { 284 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 EXTERN_C_BEGIN 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 295 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 296 { 297 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 298 PetscErrorCode ierr; 299 PetscInt n,i; 300 PetscScalar *d,*o,*s; 301 302 PetscFunctionBegin; 303 if (!uglyrmapd) { 304 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 305 } 306 307 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 308 309 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 310 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 311 for (i=0; i<n; i++) { 312 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 313 } 314 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 315 /* column scale "diagonal" portion of local matrix */ 316 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 317 318 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 319 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 320 for (i=0; i<n; i++) { 321 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 322 } 323 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 324 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 325 /* column scale "off-diagonal" portion of local matrix */ 326 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 327 328 PetscFunctionReturn(0); 329 } 330 EXTERN_C_END 331 332 333