1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include "../src/mat/impls/baij/mpi/mpibaij.h" 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 68 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 69 /* Mark Adams */ 70 #else 71 /* Make an array as long as the number of columns */ 72 /* mark those columns that are in baij->B */ 73 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 74 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 75 for (i=0; i<B->mbs; i++) { 76 for (j=0; j<B->ilen[i]; j++) { 77 if (!indices[aj[B->i[i] + j]]) ec++; 78 indices[aj[B->i[i] + j]] = 1; 79 } 80 } 81 82 /* form array of columns we need */ 83 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 84 ec = 0; 85 for (i=0; i<Nbs; i++) { 86 if (indices[i]) { 87 garray[ec++] = i; 88 } 89 } 90 91 /* make indices now point into garray */ 92 for (i=0; i<ec; i++) { 93 indices[garray[i]] = i; 94 } 95 96 /* compact out the extra columns in B */ 97 for (i=0; i<B->mbs; i++) { 98 for (j=0; j<B->ilen[i]; j++) { 99 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 100 } 101 } 102 B->nbs = ec; 103 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 104 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 105 ierr = PetscFree(indices);CHKERRQ(ierr); 106 #endif 107 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 110 111 /* create two temporary index sets for building scatter-gather */ 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 113 114 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 115 for (i=0; i<ec; i++) { stmp[i] = i; } 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 120 121 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 122 123 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 127 baij->garray = garray; 128 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 129 ierr = ISDestroy(from);CHKERRQ(ierr); 130 ierr = ISDestroy(to);CHKERRQ(ierr); 131 ierr = VecDestroy(gvec);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 /* 136 Takes the local part of an already assembled MPIBAIJ matrix 137 and disassembles it. This is to allow new nonzeros into the matrix 138 that require more communication in the matrix vector multiply. 139 Thus certain data-structures must be rebuilt. 140 141 Kind of slow! But that's what application programmers get when 142 they are sloppy. 143 */ 144 #undef __FUNCT__ 145 #define __FUNCT__ "DisAssemble_MPIBAIJ" 146 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 147 { 148 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 149 Mat B = baij->B,Bnew; 150 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 151 PetscErrorCode ierr; 152 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 153 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 154 MatScalar *a = Bbaij->a; 155 MatScalar *atmp; 156 157 158 PetscFunctionBegin; 159 /* free stuff related to matrix-vec multiply */ 160 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 161 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 162 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 163 if (baij->colmap) { 164 #if defined (PETSC_USE_CTABLE) 165 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 166 #else 167 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 168 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 169 #endif 170 } 171 172 /* make sure that B is assembled so we can access its values */ 173 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 174 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 175 176 /* invent new B and copy stuff over */ 177 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 178 for (i=0; i<mbs; i++) { 179 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 180 } 181 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 182 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 183 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 184 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 185 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 186 187 for (i=0; i<mbs; i++) { 188 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 189 col = garray[Bbaij->j[j]]; 190 atmp = a + j*bs2; 191 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 192 } 193 } 194 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 195 196 ierr = PetscFree(nz);CHKERRQ(ierr); 197 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 199 ierr = MatDestroy(B);CHKERRQ(ierr); 200 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 201 baij->B = Bnew; 202 A->was_assembled = PETSC_FALSE; 203 A->assembled = PETSC_FALSE; 204 PetscFunctionReturn(0); 205 } 206 207 /* ugly stuff added for Glenn someday we should fix this up */ 208 209 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 210 parts of the local matrix */ 211 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 212 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 216 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 217 { 218 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 219 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 220 PetscErrorCode ierr; 221 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 222 PetscInt *r_rmapd,*r_rmapo; 223 224 PetscFunctionBegin; 225 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 226 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 227 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 228 ierr = PetscMemzero(r_rmapd,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 229 nt = 0; 230 for (i=0; i<inA->rbmapping->n; i++) { 231 if (inA->rbmapping->indices[i]*bs >= cstart && inA->rbmapping->indices[i]*bs < cend) { 232 nt++; 233 r_rmapd[i] = inA->rbmapping->indices[i] + 1; 234 } 235 } 236 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 237 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 238 for (i=0; i<inA->rbmapping->n; i++) { 239 if (r_rmapd[i]){ 240 for (j=0; j<bs; j++) { 241 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 242 } 243 } 244 } 245 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 246 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 247 248 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 249 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 250 for (i=0; i<B->nbs; i++) { 251 lindices[garray[i]] = i+1; 252 } 253 no = inA->rbmapping->n - nt; 254 ierr = PetscMalloc((inA->rbmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 255 ierr = PetscMemzero(r_rmapo,inA->rbmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rbmapping->n; i++) { 258 if (lindices[inA->rbmapping->indices[i]]) { 259 nt++; 260 r_rmapo[i] = lindices[inA->rbmapping->indices[i]]; 261 } 262 } 263 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 264 ierr = PetscFree(lindices);CHKERRQ(ierr); 265 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 266 for (i=0; i<inA->rbmapping->n; i++) { 267 if (r_rmapo[i]){ 268 for (j=0; j<bs; j++) { 269 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 270 } 271 } 272 } 273 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 274 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 275 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 281 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 282 { 283 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 EXTERN_C_BEGIN 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 294 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 295 { 296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 297 PetscErrorCode ierr; 298 PetscInt n,i; 299 PetscScalar *d,*o,*s; 300 301 PetscFunctionBegin; 302 if (!uglyrmapd) { 303 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 304 } 305 306 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 307 308 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 309 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 310 for (i=0; i<n; i++) { 311 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 312 } 313 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 314 /* column scale "diagonal" portion of local matrix */ 315 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 316 317 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 318 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 319 for (i=0; i<n; i++) { 320 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 321 } 322 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 323 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 324 /* column scale "off-diagonal" portion of local matrix */ 325 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 326 327 PetscFunctionReturn(0); 328 } 329 EXTERN_C_END 330 331 332