1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 7 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 11 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 12 { 13 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 14 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 15 PetscErrorCode ierr; 16 PetscInt i,j,*aj = B->j,ec = 0,*garray; 17 PetscInt bs = mat->rmap->bs,*stmp; 18 IS from,to; 19 Vec gvec; 20 #if defined (PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt Nbs = baij->Nbs,*indices; 26 #endif 27 28 PetscFunctionBegin; 29 30 #if defined (PETSC_USE_CTABLE) 31 /* use a table - Mark Adams */ 32 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 33 for (i=0; i<B->mbs; i++) { 34 for (j=0; j<B->ilen[i]; j++) { 35 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 36 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 37 if (!data) { 38 /* one based table */ 39 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 40 } 41 } 42 } 43 /* form array of columns we need */ 44 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 45 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 46 while (tpos) { 47 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 48 gid--; lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<B->mbs; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid --; 62 aj[B->i[i]+j] = lid; 63 } 64 } 65 B->nbs = ec; 66 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 67 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 68 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 69 #else 70 /* Make an array as long as the number of columns */ 71 /* mark those columns that are in baij->B */ 72 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 73 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 74 for (i=0; i<B->mbs; i++) { 75 for (j=0; j<B->ilen[i]; j++) { 76 if (!indices[aj[B->i[i] + j]]) ec++; 77 indices[aj[B->i[i] + j]] = 1; 78 } 79 } 80 81 /* form array of columns we need */ 82 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 83 ec = 0; 84 for (i=0; i<Nbs; i++) { 85 if (indices[i]) { 86 garray[ec++] = i; 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) { 92 indices[garray[i]] = i; 93 } 94 95 /* compact out the extra columns in B */ 96 for (i=0; i<B->mbs; i++) { 97 for (j=0; j<B->ilen[i]; j++) { 98 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 99 } 100 } 101 B->nbs = ec; 102 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 103 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 104 ierr = PetscFree(indices);CHKERRQ(ierr); 105 #endif 106 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 109 110 /* create two temporary index sets for building scatter-gather */ 111 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 112 113 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 114 for (i=0; i<ec; i++) { stmp[i] = i; } 115 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 116 117 /* create temporary global vector to generate scatter context */ 118 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 119 120 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 121 122 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 126 baij->garray = garray; 127 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 128 ierr = ISDestroy(&from);CHKERRQ(ierr); 129 ierr = ISDestroy(&to);CHKERRQ(ierr); 130 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 /* 135 Takes the local part of an already assembled MPIBAIJ matrix 136 and disassembles it. This is to allow new nonzeros into the matrix 137 that require more communication in the matrix vector multiply. 138 Thus certain data-structures must be rebuilt. 139 140 Kind of slow! But that's what application programmers get when 141 they are sloppy. 142 */ 143 #undef __FUNCT__ 144 #define __FUNCT__ "DisAssemble_MPIBAIJ" 145 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 146 { 147 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 148 Mat B = baij->B,Bnew; 149 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 150 PetscErrorCode ierr; 151 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 152 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 153 MatScalar *a = Bbaij->a; 154 MatScalar *atmp; 155 156 157 PetscFunctionBegin; 158 /* free stuff related to matrix-vec multiply */ 159 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 160 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 161 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 162 if (baij->colmap) { 163 #if defined (PETSC_USE_CTABLE) 164 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 165 #else 166 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 167 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 168 #endif 169 } 170 171 /* make sure that B is assembled so we can access its values */ 172 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 173 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 174 175 /* invent new B and copy stuff over */ 176 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 177 for (i=0; i<mbs; i++) { 178 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 179 } 180 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 181 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 182 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 183 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 184 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 186 for (i=0; i<mbs; i++) { 187 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 188 col = garray[Bbaij->j[j]]; 189 atmp = a + j*bs2; 190 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 191 } 192 } 193 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 194 195 ierr = PetscFree(nz);CHKERRQ(ierr); 196 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 197 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 198 ierr = MatDestroy(&B);CHKERRQ(ierr); 199 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 200 baij->B = Bnew; 201 A->was_assembled = PETSC_FALSE; 202 A->assembled = PETSC_FALSE; 203 PetscFunctionReturn(0); 204 } 205 206 /* ugly stuff added for Glenn someday we should fix this up */ 207 208 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 209 parts of the local matrix */ 210 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 211 212 213 #undef __FUNCT__ 214 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 218 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 219 PetscErrorCode ierr; 220 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 221 PetscInt *r_rmapd,*r_rmapo; 222 223 PetscFunctionBegin; 224 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 225 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 226 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 227 ierr = PetscMemzero(r_rmapd,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 228 nt = 0; 229 for (i=0; i<inA->rmap->bmapping->n; i++) { 230 if (inA->rmap->bmapping->indices[i]*bs >= cstart && inA->rmap->bmapping->indices[i]*bs < cend) { 231 nt++; 232 r_rmapd[i] = inA->rmap->bmapping->indices[i] + 1; 233 } 234 } 235 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 236 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 237 for (i=0; i<inA->rmap->bmapping->n; i++) { 238 if (r_rmapd[i]){ 239 for (j=0; j<bs; j++) { 240 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 241 } 242 } 243 } 244 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 245 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 246 247 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 248 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 249 for (i=0; i<B->nbs; i++) { 250 lindices[garray[i]] = i+1; 251 } 252 no = inA->rmap->bmapping->n - nt; 253 ierr = PetscMalloc((inA->rmap->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 254 ierr = PetscMemzero(r_rmapo,inA->rmap->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->rmap->bmapping->n; i++) { 257 if (lindices[inA->rmap->bmapping->indices[i]]) { 258 nt++; 259 r_rmapo[i] = lindices[inA->rmap->bmapping->indices[i]]; 260 } 261 } 262 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 263 ierr = PetscFree(lindices);CHKERRQ(ierr); 264 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->bmapping->n; i++) { 266 if (r_rmapo[i]){ 267 for (j=0; j<bs; j++) { 268 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 269 } 270 } 271 } 272 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 274 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 280 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 281 { 282 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 283 PetscErrorCode ierr; 284 285 PetscFunctionBegin; 286 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 EXTERN_C_BEGIN 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 293 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 294 { 295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 296 PetscErrorCode ierr; 297 PetscInt n,i; 298 PetscScalar *d,*o,*s; 299 300 PetscFunctionBegin; 301 if (!uglyrmapd) { 302 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 303 } 304 305 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 306 307 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 308 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 309 for (i=0; i<n; i++) { 310 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 311 } 312 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 313 /* column scale "diagonal" portion of local matrix */ 314 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 315 316 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 317 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 318 for (i=0; i<n; i++) { 319 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 320 } 321 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 322 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 323 /* column scale "off-diagonal" portion of local matrix */ 324 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 325 326 PetscFunctionReturn(0); 327 } 328 EXTERN_C_END 329 330 331