1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt N = mat->cmap->N,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 if (!aij->garray) { 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table */ 28 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<aij->B->rmap->n; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 49 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 50 for (i=0; i<ec; i++) { 51 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 52 } 53 /* compact out the extra columns in B */ 54 for (i=0; i<aij->B->rmap->n; i++) { 55 for (j=0; j<B->ilen[i]; j++) { 56 PetscInt gid1 = aj[B->i[i] + j] + 1; 57 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58 lid--; 59 aj[B->i[i] + j] = lid; 60 } 61 } 62 aij->B->cmap->n = aij->B->cmap->N = ec; 63 aij->B->cmap->bs = 1; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 aij->B->cmap->bs = 1; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 } else { 103 garray = aij->garray; 104 } 105 106 if (!aij->lvec) { 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 109 } else { 110 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 111 } 112 113 /* create two temporary Index sets for build scatter gather */ 114 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 115 116 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 /* This does not allocate the array's memory so is efficient */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 /* generate the scatter context */ 123 if (aij->Mvctx_mpi1_flg || mat->mpi1) { 124 if (aij->Mvctx_mpi1) { 125 /* Must destroy existing Mvctx_mpi1, then recreate it. See src/ksp/ksp/examples/tutorials/network/runex1_nest_2 */ 126 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 } 128 ierr = VecScatterCreateMPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 130 } else { 131 if (aij->Mvctx) { 132 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 133 } 134 135 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 138 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 139 } 140 aij->garray = garray; 141 142 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 144 145 ierr = ISDestroy(&from);CHKERRQ(ierr); 146 ierr = ISDestroy(&to);CHKERRQ(ierr); 147 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 148 PetscFunctionReturn(0); 149 } 150 151 /* 152 Takes the local part of an already assembled MPIAIJ matrix 153 and disassembles it. This is to allow new nonzeros into the matrix 154 that require more communication in the matrix vector multiply. 155 Thus certain data-structures must be rebuilt. 156 157 Kind of slow! But that's what application programmers get when 158 they are sloppy. 159 */ 160 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 161 { 162 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 163 Mat B = aij->B,Bnew; 164 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 165 PetscErrorCode ierr; 166 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 167 PetscScalar v; 168 169 PetscFunctionBegin; 170 /* free stuff related to matrix-vec multiply */ 171 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 172 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 173 if (aij->colmap) { 174 #if defined(PETSC_USE_CTABLE) 175 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 176 #else 177 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 178 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 179 #endif 180 } 181 182 /* make sure that B is assembled so we can access its values */ 183 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 184 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 185 186 /* invent new B and copy stuff over */ 187 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 188 for (i=0; i<m; i++) { 189 nz[i] = Baij->i[i+1] - Baij->i[i]; 190 } 191 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 192 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 193 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 194 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 195 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 196 197 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 198 /* 199 Ensure that B's nonzerostate is monotonically increasing. 200 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 201 */ 202 Bnew->nonzerostate = B->nonzerostate; 203 204 ierr = PetscFree(nz);CHKERRQ(ierr); 205 for (i=0; i<m; i++) { 206 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 207 col = garray[Baij->j[ct]]; 208 v = Baij->a[ct++]; 209 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 210 } 211 } 212 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 213 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = MatDestroy(&B);CHKERRQ(ierr); 215 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 216 217 aij->B = Bnew; 218 A->was_assembled = PETSC_FALSE; 219 PetscFunctionReturn(0); 220 } 221 222 /* ugly stuff added for Glenn someday we should fix this up */ 223 224 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 225 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 226 227 228 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 229 { 230 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 231 PetscErrorCode ierr; 232 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 233 PetscInt *r_rmapd,*r_rmapo; 234 235 PetscFunctionBegin; 236 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 237 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 238 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 239 nt = 0; 240 for (i=0; i<inA->rmap->mapping->n; i++) { 241 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 242 nt++; 243 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 244 } 245 } 246 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 247 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 248 for (i=0; i<inA->rmap->mapping->n; i++) { 249 if (r_rmapd[i]) { 250 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 251 } 252 } 253 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 254 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 255 256 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 257 for (i=0; i<ina->B->cmap->n; i++) { 258 lindices[garray[i]] = i+1; 259 } 260 no = inA->rmap->mapping->n - nt; 261 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 262 nt = 0; 263 for (i=0; i<inA->rmap->mapping->n; i++) { 264 if (lindices[inA->rmap->mapping->indices[i]]) { 265 nt++; 266 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 267 } 268 } 269 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 270 ierr = PetscFree(lindices);CHKERRQ(ierr); 271 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 272 for (i=0; i<inA->rmap->mapping->n; i++) { 273 if (r_rmapo[i]) { 274 auglyrmapo[(r_rmapo[i]-1)] = i; 275 } 276 } 277 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 278 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 283 { 284 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 285 PetscErrorCode ierr; 286 287 PetscFunctionBegin; 288 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 293 { 294 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 295 PetscErrorCode ierr; 296 PetscInt n,i; 297 PetscScalar *d,*o; 298 const PetscScalar *s; 299 300 PetscFunctionBegin; 301 if (!auglyrmapd) { 302 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 303 } 304 305 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 306 307 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 308 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 309 for (i=0; i<n; i++) { 310 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 311 } 312 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 313 /* column scale "diagonal" portion of local matrix */ 314 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 315 316 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 317 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 318 for (i=0; i<n; i++) { 319 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 320 } 321 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 322 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 323 /* column scale "off-diagonal" portion of local matrix */ 324 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327