1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt N = mat->cmap->N,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 if (!aij->garray) { 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table */ 28 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<aij->B->rmap->n; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 49 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 50 for (i=0; i<ec; i++) { 51 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 52 } 53 /* compact out the extra columns in B */ 54 for (i=0; i<aij->B->rmap->n; i++) { 55 for (j=0; j<B->ilen[i]; j++) { 56 PetscInt gid1 = aj[B->i[i] + j] + 1; 57 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58 lid--; 59 aj[B->i[i] + j] = lid; 60 } 61 } 62 aij->B->cmap->n = aij->B->cmap->N = ec; 63 aij->B->cmap->bs = 1; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 aij->B->cmap->bs = 1; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 } else { 103 garray = aij->garray; 104 } 105 106 if (!aij->lvec) { 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 109 } else { 110 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 111 } 112 113 /* create two temporary Index sets for build scatter gather */ 114 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 115 116 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 /* This does not allocate the array's memory so is efficient */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 /* generate the scatter context */ 123 if (aij->Mvctx_mpi1_flg || mat->mpi1) { 124 if (aij->Mvctx_mpi1) { 125 /* Must destroy existing Mvctx_mpi1, then recreate it. See src/ksp/ksp/examples/tutorials/network/runex1_nest_2 */ 126 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 } 128 ierr = VecScatterCreateMPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 130 } else { 131 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 134 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 135 } 136 aij->garray = garray; 137 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 140 141 ierr = ISDestroy(&from);CHKERRQ(ierr); 142 ierr = ISDestroy(&to);CHKERRQ(ierr); 143 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /* 148 Takes the local part of an already assembled MPIAIJ matrix 149 and disassembles it. This is to allow new nonzeros into the matrix 150 that require more communication in the matrix vector multiply. 151 Thus certain data-structures must be rebuilt. 152 153 Kind of slow! But that's what application programmers get when 154 they are sloppy. 155 */ 156 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 159 Mat B = aij->B,Bnew; 160 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 163 PetscScalar v; 164 165 PetscFunctionBegin; 166 /* free stuff related to matrix-vec multiply */ 167 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 168 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 169 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 170 if (aij->colmap) { 171 #if defined(PETSC_USE_CTABLE) 172 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 173 #else 174 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 175 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 176 #endif 177 } 178 179 /* make sure that B is assembled so we can access its values */ 180 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 183 /* invent new B and copy stuff over */ 184 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 185 for (i=0; i<m; i++) { 186 nz[i] = Baij->i[i+1] - Baij->i[i]; 187 } 188 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 189 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 190 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 191 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 192 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 193 194 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 195 /* 196 Ensure that B's nonzerostate is monotonically increasing. 197 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 198 */ 199 Bnew->nonzerostate = B->nonzerostate; 200 201 ierr = PetscFree(nz);CHKERRQ(ierr); 202 for (i=0; i<m; i++) { 203 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 204 col = garray[Baij->j[ct]]; 205 v = Baij->a[ct++]; 206 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 207 } 208 } 209 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 210 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = MatDestroy(&B);CHKERRQ(ierr); 212 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 213 214 aij->B = Bnew; 215 A->was_assembled = PETSC_FALSE; 216 PetscFunctionReturn(0); 217 } 218 219 /* ugly stuff added for Glenn someday we should fix this up */ 220 221 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 222 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 223 224 225 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 226 { 227 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 228 PetscErrorCode ierr; 229 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 230 PetscInt *r_rmapd,*r_rmapo; 231 232 PetscFunctionBegin; 233 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 234 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 235 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 236 nt = 0; 237 for (i=0; i<inA->rmap->mapping->n; i++) { 238 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 239 nt++; 240 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 241 } 242 } 243 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 244 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 245 for (i=0; i<inA->rmap->mapping->n; i++) { 246 if (r_rmapd[i]) { 247 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 248 } 249 } 250 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 251 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 252 253 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 254 for (i=0; i<ina->B->cmap->n; i++) { 255 lindices[garray[i]] = i+1; 256 } 257 no = inA->rmap->mapping->n - nt; 258 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 259 nt = 0; 260 for (i=0; i<inA->rmap->mapping->n; i++) { 261 if (lindices[inA->rmap->mapping->indices[i]]) { 262 nt++; 263 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 264 } 265 } 266 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 267 ierr = PetscFree(lindices);CHKERRQ(ierr); 268 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 269 for (i=0; i<inA->rmap->mapping->n; i++) { 270 if (r_rmapo[i]) { 271 auglyrmapo[(r_rmapo[i]-1)] = i; 272 } 273 } 274 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 275 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 276 PetscFunctionReturn(0); 277 } 278 279 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 280 { 281 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 290 { 291 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 292 PetscErrorCode ierr; 293 PetscInt n,i; 294 PetscScalar *d,*o; 295 const PetscScalar *s; 296 297 PetscFunctionBegin; 298 if (!auglyrmapd) { 299 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 300 } 301 302 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 303 304 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 305 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 306 for (i=0; i<n; i++) { 307 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 308 } 309 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 310 /* column scale "diagonal" portion of local matrix */ 311 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 312 313 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 314 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 315 for (i=0; i<n; i++) { 316 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 317 } 318 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 319 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 320 /* column scale "off-diagonal" portion of local matrix */ 321 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 322 PetscFunctionReturn(0); 323 } 324