1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 aij->B->cmap->bs = 1; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 aij->B->cmap->bs = 1; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 } else { 104 garray = aij->garray; 105 } 106 107 if (!aij->lvec) { 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 110 } else { 111 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 112 } 113 114 /* create two temporary Index sets for build scatter gather */ 115 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 116 117 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 /* This does not allocate the array's memory so is efficient */ 121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 122 123 /* generate the scatter context */ 124 if (aij->Mvctx_mpi1_flg) { 125 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = VecScatterCreate_MPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 128 } else { 129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 130 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 133 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 134 135 if (!aij->Mvctx->mpi3) { 136 /* MPI1 is used, there is no need to create aij->Mvctx_mpi1 for Mat-Mat ops */ 137 aij->Mvctx_mpi1 = aij->Mvctx; 138 aij->Mvctx_mpi1_flg = PETSC_TRUE; 139 ierr = PetscObjectReference((PetscObject)aij->Mvctx);CHKERRQ(ierr); 140 } 141 } 142 aij->garray = garray; 143 144 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 146 147 ierr = ISDestroy(&from);CHKERRQ(ierr); 148 ierr = ISDestroy(&to);CHKERRQ(ierr); 149 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 /* 154 Takes the local part of an already assembled MPIAIJ matrix 155 and disassembles it. This is to allow new nonzeros into the matrix 156 that require more communication in the matrix vector multiply. 157 Thus certain data-structures must be rebuilt. 158 159 Kind of slow! But that's what application programmers get when 160 they are sloppy. 161 */ 162 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 163 { 164 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 165 Mat B = aij->B,Bnew; 166 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 167 PetscErrorCode ierr; 168 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 169 PetscScalar v; 170 171 PetscFunctionBegin; 172 /* free stuff related to matrix-vec multiply */ 173 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 174 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 175 if (aij->colmap) { 176 #if defined(PETSC_USE_CTABLE) 177 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 178 #else 179 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 180 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 181 #endif 182 } 183 184 /* make sure that B is assembled so we can access its values */ 185 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 186 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 187 188 /* invent new B and copy stuff over */ 189 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 190 for (i=0; i<m; i++) { 191 nz[i] = Baij->i[i+1] - Baij->i[i]; 192 } 193 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 194 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 195 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 196 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 197 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 198 199 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 200 /* 201 Ensure that B's nonzerostate is monotonically increasing. 202 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 203 */ 204 Bnew->nonzerostate = B->nonzerostate; 205 206 ierr = PetscFree(nz);CHKERRQ(ierr); 207 for (i=0; i<m; i++) { 208 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 209 col = garray[Baij->j[ct]]; 210 v = Baij->a[ct++]; 211 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 212 } 213 } 214 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 215 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 216 ierr = MatDestroy(&B);CHKERRQ(ierr); 217 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 218 219 aij->B = Bnew; 220 A->was_assembled = PETSC_FALSE; 221 PetscFunctionReturn(0); 222 } 223 224 /* ugly stuff added for Glenn someday we should fix this up */ 225 226 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 227 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 228 229 230 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 231 { 232 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 233 PetscErrorCode ierr; 234 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 235 PetscInt *r_rmapd,*r_rmapo; 236 237 PetscFunctionBegin; 238 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 239 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 240 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 241 nt = 0; 242 for (i=0; i<inA->rmap->mapping->n; i++) { 243 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 244 nt++; 245 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 246 } 247 } 248 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 249 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 250 for (i=0; i<inA->rmap->mapping->n; i++) { 251 if (r_rmapd[i]) { 252 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 253 } 254 } 255 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 256 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 257 258 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 259 for (i=0; i<ina->B->cmap->n; i++) { 260 lindices[garray[i]] = i+1; 261 } 262 no = inA->rmap->mapping->n - nt; 263 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 264 nt = 0; 265 for (i=0; i<inA->rmap->mapping->n; i++) { 266 if (lindices[inA->rmap->mapping->indices[i]]) { 267 nt++; 268 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 269 } 270 } 271 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 272 ierr = PetscFree(lindices);CHKERRQ(ierr); 273 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 274 for (i=0; i<inA->rmap->mapping->n; i++) { 275 if (r_rmapo[i]) { 276 auglyrmapo[(r_rmapo[i]-1)] = i; 277 } 278 } 279 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 280 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 285 { 286 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 287 PetscErrorCode ierr; 288 289 PetscFunctionBegin; 290 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 295 { 296 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 297 PetscErrorCode ierr; 298 PetscInt n,i; 299 PetscScalar *d,*o; 300 const PetscScalar *s; 301 302 PetscFunctionBegin; 303 if (!auglyrmapd) { 304 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 305 } 306 307 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 308 309 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 310 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 311 for (i=0; i<n; i++) { 312 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 313 } 314 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 315 /* column scale "diagonal" portion of local matrix */ 316 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 317 318 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 319 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 320 for (i=0; i<n; i++) { 321 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 322 } 323 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 324 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 325 /* column scale "off-diagonal" portion of local matrix */ 326 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 327 PetscFunctionReturn(0); 328 } 329