1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscInt i,j,*aj = B->j,*garray; 14 PetscInt ec = 0; /* Number of nonzero external columns */ 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 28 #if defined(PETSC_USE_CTABLE) 29 /* use a table */ 30 PetscCall(PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1)); 31 for (i=0; i<aij->B->rmap->n; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 34 PetscCall(PetscTableFind(gid1_lid1,gid1,&data)); 35 if (!data) { 36 /* one based table */ 37 PetscCall(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 PetscCall(PetscMalloc1(ec,&garray)); 43 PetscCall(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 44 while (tpos) { 45 PetscCall(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 46 gid--; 47 lid--; 48 garray[lid] = gid; 49 } 50 PetscCall(PetscSortInt(ec,garray)); /* sort, and rebuild */ 51 PetscCall(PetscTableRemoveAll(gid1_lid1)); 52 for (i=0; i<ec; i++) { 53 PetscCall(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<aij->B->rmap->n; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 PetscCall(PetscTableFind(gid1_lid1,gid1,&lid)); 60 lid--; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 65 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap)); 66 PetscCall(PetscTableDestroy(&gid1_lid1)); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 PetscCall(PetscCalloc1(N,&indices)); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 PetscCall(PetscMalloc1(ec,&garray)); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 97 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap)); 98 PetscCall(PetscFree(indices)); 99 #endif 100 } else { 101 garray = aij->garray; 102 } 103 104 if (!aij->lvec) { 105 PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 106 PetscCall(MatCreateVecs(aij->B,&aij->lvec,NULL)); 107 } 108 PetscCall(VecGetSize(aij->lvec,&ec)); 109 110 /* create two temporary Index sets for build scatter gather */ 111 PetscCall(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from)); 112 PetscCall(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to)); 113 114 /* create temporary global vector to generate scatter context */ 115 /* This does not allocate the array's memory so is efficient */ 116 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 117 118 /* generate the scatter context */ 119 PetscCall(VecScatterDestroy(&aij->Mvctx)); 120 PetscCall(VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx)); 121 PetscCall(VecScatterViewFromOptions(aij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 122 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx)); 123 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec)); 124 PetscCall(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 125 aij->garray = garray; 126 127 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 128 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 129 130 PetscCall(ISDestroy(&from)); 131 PetscCall(ISDestroy(&to)); 132 PetscCall(VecDestroy(&gvec)); 133 PetscFunctionReturn(0); 134 } 135 136 /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 137 In other words, change the B from reduced format using local col ids 138 to expanded format using global col ids, which will make it easier to 139 insert new nonzeros (with global col ids) into the matrix. 140 The off-diag B determines communication in the matrix vector multiply. 141 */ 142 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 145 Mat B = aij->B,Bnew; 146 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 147 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 148 PetscScalar v; 149 const PetscScalar *ba; 150 151 PetscFunctionBegin; 152 /* free stuff related to matrix-vec multiply */ 153 PetscCall(VecGetSize(aij->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 154 PetscCall(VecDestroy(&aij->lvec)); 155 if (aij->colmap) { 156 #if defined(PETSC_USE_CTABLE) 157 PetscCall(PetscTableDestroy(&aij->colmap)); 158 #else 159 PetscCall(PetscFree(aij->colmap)); 160 PetscCall(PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt))); 161 #endif 162 } 163 164 /* make sure that B is assembled so we can access its values */ 165 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 166 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 167 168 /* invent new B and copy stuff over */ 169 PetscCall(PetscMalloc1(m+1,&nz)); 170 for (i=0; i<m; i++) { 171 nz[i] = Baij->i[i+1] - Baij->i[i]; 172 } 173 PetscCall(MatCreate(PETSC_COMM_SELF,&Bnew)); 174 PetscCall(MatSetSizes(Bnew,m,n,m,n)); /* Bnew now uses A->cmap->N as its col size */ 175 PetscCall(MatSetBlockSizesFromMats(Bnew,A,A)); 176 PetscCall(MatSetType(Bnew,((PetscObject)B)->type_name)); 177 PetscCall(MatSeqAIJSetPreallocation(Bnew,0,nz)); 178 179 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 180 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 181 } 182 183 /* 184 Ensure that B's nonzerostate is monotonically increasing. 185 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 186 */ 187 Bnew->nonzerostate = B->nonzerostate; 188 189 PetscCall(PetscFree(nz)); 190 PetscCall(MatSeqAIJGetArrayRead(B,&ba)); 191 for (i=0; i<m; i++) { 192 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 193 col = garray[Baij->j[ct]]; 194 v = ba[ct++]; 195 PetscCall(MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode)); 196 } 197 } 198 PetscCall(MatSeqAIJRestoreArrayRead(B,&ba)); 199 200 PetscCall(PetscFree(aij->garray)); 201 PetscCall(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 202 PetscCall(MatDestroy(&B)); 203 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 204 205 aij->B = Bnew; 206 A->was_assembled = PETSC_FALSE; 207 PetscFunctionReturn(0); 208 } 209 210 /* ugly stuff added for Glenn someday we should fix this up */ 211 212 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 213 static Vec auglydd = NULL,auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 214 215 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 218 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 219 PetscInt *r_rmapd,*r_rmapo; 220 221 PetscFunctionBegin; 222 PetscCall(MatGetOwnershipRange(inA,&cstart,&cend)); 223 PetscCall(MatGetSize(ina->A,NULL,&n)); 224 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 225 nt = 0; 226 for (i=0; i<inA->rmap->mapping->n; i++) { 227 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 228 nt++; 229 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 230 } 231 } 232 PetscCheckFalse(nt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n); 233 PetscCall(PetscMalloc1(n+1,&auglyrmapd)); 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (r_rmapd[i]) { 236 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 237 } 238 } 239 PetscCall(PetscFree(r_rmapd)); 240 PetscCall(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd)); 241 242 PetscCall(PetscCalloc1(inA->cmap->N+1,&lindices)); 243 for (i=0; i<ina->B->cmap->n; i++) { 244 lindices[garray[i]] = i+1; 245 } 246 no = inA->rmap->mapping->n - nt; 247 PetscCall(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 248 nt = 0; 249 for (i=0; i<inA->rmap->mapping->n; i++) { 250 if (lindices[inA->rmap->mapping->indices[i]]) { 251 nt++; 252 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 253 } 254 } 255 PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 256 PetscCall(PetscFree(lindices)); 257 PetscCall(PetscMalloc1(nt+1,&auglyrmapo)); 258 for (i=0; i<inA->rmap->mapping->n; i++) { 259 if (r_rmapo[i]) { 260 auglyrmapo[(r_rmapo[i]-1)] = i; 261 } 262 } 263 PetscCall(PetscFree(r_rmapo)); 264 PetscCall(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo)); 265 PetscFunctionReturn(0); 266 } 267 268 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 269 { 270 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 271 272 PetscFunctionBegin; 273 PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale)); 274 PetscFunctionReturn(0); 275 } 276 277 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 278 { 279 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 280 PetscInt n,i; 281 PetscScalar *d,*o; 282 const PetscScalar *s; 283 284 PetscFunctionBegin; 285 if (!auglyrmapd) { 286 PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A,scale)); 287 } 288 289 PetscCall(VecGetArrayRead(scale,&s)); 290 291 PetscCall(VecGetLocalSize(auglydd,&n)); 292 PetscCall(VecGetArray(auglydd,&d)); 293 for (i=0; i<n; i++) { 294 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 295 } 296 PetscCall(VecRestoreArray(auglydd,&d)); 297 /* column scale "diagonal" portion of local matrix */ 298 PetscCall(MatDiagonalScale(a->A,NULL,auglydd)); 299 300 PetscCall(VecGetLocalSize(auglyoo,&n)); 301 PetscCall(VecGetArray(auglyoo,&o)); 302 for (i=0; i<n; i++) { 303 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 304 } 305 PetscCall(VecRestoreArrayRead(scale,&s)); 306 PetscCall(VecRestoreArray(auglyoo,&o)); 307 /* column scale "off-diagonal" portion of local matrix */ 308 PetscCall(MatDiagonalScale(a->B,NULL,auglyoo)); 309 PetscFunctionReturn(0); 310 } 311