1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/baij/mpi/mpibaij.h" 7 8 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 105 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 114 115 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 116 for (i=0; i<ec; i++) { stmp[i] = i; } 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 121 122 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 123 124 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 128 baij->garray = garray; 129 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 130 ierr = ISDestroy(from);CHKERRQ(ierr); 131 ierr = ISDestroy(to);CHKERRQ(ierr); 132 ierr = VecDestroy(gvec);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* 137 Takes the local part of an already assembled MPIBAIJ matrix 138 and disassembles it. This is to allow new nonzeros into the matrix 139 that require more communication in the matrix vector multiply. 140 Thus certain data-structures must be rebuilt. 141 142 Kind of slow! But that's what application programmers get when 143 they are sloppy. 144 */ 145 #undef __FUNCT__ 146 #define __FUNCT__ "DisAssemble_MPIBAIJ" 147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 148 { 149 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 150 Mat B = baij->B,Bnew; 151 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 152 PetscErrorCode ierr; 153 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 154 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 155 MatScalar *a = Bbaij->a; 156 MatScalar *atmp; 157 158 159 PetscFunctionBegin; 160 /* free stuff related to matrix-vec multiply */ 161 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 162 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 163 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 164 if (baij->colmap) { 165 #if defined (PETSC_USE_CTABLE) 166 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 167 #else 168 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 169 baij->colmap = 0; 170 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 171 #endif 172 } 173 174 /* make sure that B is assembled so we can access its values */ 175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 178 /* invent new B and copy stuff over */ 179 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 180 for (i=0; i<mbs; i++) { 181 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 182 } 183 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 184 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 187 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 188 189 for (i=0; i<mbs; i++) { 190 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 191 col = garray[Bbaij->j[j]]; 192 atmp = a + j*bs2; 193 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 194 } 195 } 196 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 197 198 ierr = PetscFree(nz);CHKERRQ(ierr); 199 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 200 baij->garray = 0; 201 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 202 ierr = MatDestroy(B);CHKERRQ(ierr); 203 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 204 baij->B = Bnew; 205 A->was_assembled = PETSC_FALSE; 206 A->assembled = PETSC_FALSE; 207 PetscFunctionReturn(0); 208 } 209 210 /* ugly stuff added for Glenn someday we should fix this up */ 211 212 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 213 parts of the local matrix */ 214 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 215 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 219 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 220 { 221 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 222 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 223 PetscErrorCode ierr; 224 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 225 PetscInt *r_rmapd,*r_rmapo; 226 227 PetscFunctionBegin; 228 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 229 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 230 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 231 ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 232 nt = 0; 233 for (i=0; i<inA->rbmapping->n; i++) { 234 if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) { 235 nt++; 236 r_rmapd[i] = inA->rbmapping->indices[i] + 1; 237 } 238 } 239 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 240 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 241 for (i=0; i<inA->rbmapping->n; i++) { 242 if (r_rmapd[i]){ 243 for (j=0; j<bs; j++) { 244 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 245 } 246 } 247 } 248 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 249 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 250 251 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 252 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 253 for (i=0; i<B->nbs; i++) { 254 lindices[garray[i]] = i+1; 255 } 256 no = inA->rbmapping->n - nt; 257 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 258 ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 259 nt = 0; 260 for (i=0; i<inA->rbmapping->n; i++) { 261 if (lindices[inA->rbmapping->indices[i]]) { 262 nt++; 263 r_rmapo[i] = lindices[inA->rbmapping->indices[i]]; 264 } 265 } 266 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 267 ierr = PetscFree(lindices);CHKERRQ(ierr); 268 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 269 for (i=0; i<inA->rbmapping->n; i++) { 270 if (r_rmapo[i]){ 271 for (j=0; j<bs; j++) { 272 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 273 } 274 } 275 } 276 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 277 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 278 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 284 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 285 { 286 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 287 PetscErrorCode ierr; 288 289 PetscFunctionBegin; 290 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 EXTERN_C_BEGIN 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 297 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 298 { 299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 300 PetscErrorCode ierr; 301 PetscInt n,i; 302 PetscScalar *d,*o,*s; 303 304 PetscFunctionBegin; 305 if (!uglyrmapd) { 306 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 307 } 308 309 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 310 311 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 312 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 313 for (i=0; i<n; i++) { 314 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 315 } 316 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 317 /* column scale "diagonal" portion of local matrix */ 318 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 319 320 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 321 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 322 for (i=0; i<n; i++) { 323 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 324 } 325 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 326 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 327 /* column scale "off-diagonal" portion of local matrix */ 328 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 329 330 PetscFunctionReturn(0); 331 } 332 EXTERN_C_END 333 334 335