1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel BAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/baij/mpi/mpibaij.h" 7 8 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 13 { 14 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 16 PetscErrorCode ierr; 17 PetscInt i,j,*aj = B->j,ec = 0,*garray; 18 PetscInt bs = mat->rmap->bs,*stmp; 19 IS from,to; 20 Vec gvec; 21 #if defined (PETSC_USE_CTABLE) 22 PetscTable gid1_lid1; 23 PetscTablePosition tpos; 24 PetscInt gid,lid; 25 #else 26 PetscInt Nbs = baij->Nbs,*indices; 27 #endif 28 29 PetscFunctionBegin; 30 31 #if defined (PETSC_USE_CTABLE) 32 /* use a table - Mark Adams */ 33 ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr); 34 for (i=0; i<B->mbs; i++) { 35 for (j=0; j<B->ilen[i]; j++) { 36 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 37 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 38 if (!data) { 39 /* one based table */ 40 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 41 } 42 } 43 } 44 /* form array of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 47 while (tpos) { 48 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 49 gid--; lid--; 50 garray[lid] = gid; 51 } 52 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 53 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 54 for (i=0; i<ec; i++) { 55 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 56 } 57 /* compact out the extra columns in B */ 58 for (i=0; i<B->mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) { 60 PetscInt gid1 = aj[B->i[i] + j] + 1; 61 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 62 lid --; 63 aj[B->i[i]+j] = lid; 64 } 65 } 66 B->nbs = ec; 67 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 68 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 69 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 70 /* Mark Adams */ 71 #else 72 /* Make an array as long as the number of columns */ 73 /* mark those columns that are in baij->B */ 74 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 75 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 76 for (i=0; i<B->mbs; i++) { 77 for (j=0; j<B->ilen[i]; j++) { 78 if (!indices[aj[B->i[i] + j]]) ec++; 79 indices[aj[B->i[i] + j]] = 1; 80 } 81 } 82 83 /* form array of columns we need */ 84 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 85 ec = 0; 86 for (i=0; i<Nbs; i++) { 87 if (indices[i]) { 88 garray[ec++] = i; 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i=0; i<ec; i++) { 94 indices[garray[i]] = i; 95 } 96 97 /* compact out the extra columns in B */ 98 for (i=0; i<B->mbs; i++) { 99 for (j=0; j<B->ilen[i]; j++) { 100 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 101 } 102 } 103 B->nbs = ec; 104 baij->B->cmap->n =baij->B->cmap->N = ec*mat->rmap->bs; 105 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 106 ierr = PetscFree(indices);CHKERRQ(ierr); 107 #endif 108 109 /* create local vector that is used to scatter into */ 110 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 111 112 /* create two temporary index sets for building scatter-gather */ 113 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 114 115 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 116 for (i=0; i<ec; i++) { stmp[i] = i; } 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 121 122 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 123 124 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 128 baij->garray = garray; 129 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 130 ierr = ISDestroy(from);CHKERRQ(ierr); 131 ierr = ISDestroy(to);CHKERRQ(ierr); 132 ierr = VecDestroy(gvec);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* 137 Takes the local part of an already assembled MPIBAIJ matrix 138 and disassembles it. This is to allow new nonzeros into the matrix 139 that require more communication in the matrix vector multiply. 140 Thus certain data-structures must be rebuilt. 141 142 Kind of slow! But that's what application programmers get when 143 they are sloppy. 144 */ 145 #undef __FUNCT__ 146 #define __FUNCT__ "DisAssemble_MPIBAIJ" 147 PetscErrorCode DisAssemble_MPIBAIJ(Mat A) 148 { 149 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 150 Mat B = baij->B,Bnew; 151 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 152 PetscErrorCode ierr; 153 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 154 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 155 MatScalar *a = Bbaij->a; 156 MatScalar *atmp; 157 158 159 PetscFunctionBegin; 160 /* free stuff related to matrix-vec multiply */ 161 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 162 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 163 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 164 if (baij->colmap) { 165 #if defined (PETSC_USE_CTABLE) 166 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 167 #else 168 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 169 baij->colmap = 0; 170 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 171 #endif 172 } 173 174 /* make sure that B is assembled so we can access its values */ 175 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 178 /* invent new B and copy stuff over */ 179 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 180 for (i=0; i<mbs; i++) { 181 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 182 } 183 ierr = MatCreate(((PetscObject)B)->comm,&Bnew);CHKERRQ(ierr); 184 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 187 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 188 189 for (i=0; i<mbs; i++) { 190 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 191 col = garray[Bbaij->j[j]]; 192 atmp = a + j*bs2; 193 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 194 } 195 } 196 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 197 198 ierr = PetscFree(nz);CHKERRQ(ierr); 199 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 200 baij->garray = 0; 201 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 202 ierr = MatDestroy(B);CHKERRQ(ierr); 203 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 204 baij->B = Bnew; 205 A->was_assembled = PETSC_FALSE; 206 PetscFunctionReturn(0); 207 } 208 209 /* ugly stuff added for Glenn someday we should fix this up */ 210 211 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 212 parts of the local matrix */ 213 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 214 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp" 218 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 219 { 220 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 221 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 222 PetscErrorCode ierr; 223 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 224 PetscInt *r_rmapd,*r_rmapo; 225 226 PetscFunctionBegin; 227 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 228 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 229 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 230 ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 231 nt = 0; 232 for (i=0; i<inA->bmapping->n; i++) { 233 if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) { 234 nt++; 235 r_rmapd[i] = inA->bmapping->indices[i] + 1; 236 } 237 } 238 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 239 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr); 240 for (i=0; i<inA->bmapping->n; i++) { 241 if (r_rmapd[i]){ 242 for (j=0; j<bs; j++) { 243 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 244 } 245 } 246 } 247 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 248 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 249 250 ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 251 ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 252 for (i=0; i<B->nbs; i++) { 253 lindices[garray[i]] = i+1; 254 } 255 no = inA->bmapping->n - nt; 256 ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 257 ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr); 258 nt = 0; 259 for (i=0; i<inA->bmapping->n; i++) { 260 if (lindices[inA->bmapping->indices[i]]) { 261 nt++; 262 r_rmapo[i] = lindices[inA->bmapping->indices[i]]; 263 } 264 } 265 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 266 ierr = PetscFree(lindices);CHKERRQ(ierr); 267 ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr); 268 for (i=0; i<inA->bmapping->n; i++) { 269 if (r_rmapo[i]){ 270 for (j=0; j<bs; j++) { 271 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 272 } 273 } 274 } 275 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 276 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 277 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal" 283 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 284 { 285 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 EXTERN_C_BEGIN 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ" 296 PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 297 { 298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 299 PetscErrorCode ierr; 300 PetscInt n,i; 301 PetscScalar *d,*o,*s; 302 303 PetscFunctionBegin; 304 if (!uglyrmapd) { 305 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 306 } 307 308 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 309 310 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 311 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 312 for (i=0; i<n; i++) { 313 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 314 } 315 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 316 /* column scale "diagonal" portion of local matrix */ 317 ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr); 318 319 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 320 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 321 for (i=0; i<n; i++) { 322 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 323 } 324 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 325 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 326 /* column scale "off-diagonal" portion of local matrix */ 327 ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr); 328 329 PetscFunctionReturn(0); 330 } 331 EXTERN_C_END 332 333 334