1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ *)(aij->B->data); 12 PetscInt i, j, *aj = B->j, *garray; 13 PetscInt ec = 0; /* Number of nonzero external columns */ 14 IS from, to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid, lid; 20 #else 21 PetscInt N = mat->cmap->N, *indices; 22 #endif 23 24 PetscFunctionBegin; 25 if (!aij->garray) { 26 PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 PetscCall(PetscTableCreate(aij->B->rmap->n, mat->cmap->N + 1, &gid1_lid1)); 30 for (i = 0; i < aij->B->rmap->n; i++) { 31 for (j = 0; j < B->ilen[i]; j++) { 32 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33 PetscCall(PetscTableFind(gid1_lid1, gid1, &data)); 34 if (!data) { 35 /* one based table */ 36 PetscCall(PetscTableAdd(gid1_lid1, gid1, ++ec, INSERT_VALUES)); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 PetscCall(PetscMalloc1(ec, &garray)); 42 PetscCall(PetscTableGetHeadPosition(gid1_lid1, &tpos)); 43 while (tpos) { 44 PetscCall(PetscTableGetNext(gid1_lid1, &tpos, &gid, &lid)); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 50 PetscCall(PetscTableRemoveAll(gid1_lid1)); 51 for (i = 0; i < ec; i++) { PetscCall(PetscTableAdd(gid1_lid1, garray[i] + 1, i + 1, INSERT_VALUES)); } 52 /* compact out the extra columns in B */ 53 for (i = 0; i < aij->B->rmap->n; i++) { 54 for (j = 0; j < B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 PetscCall(PetscTableFind(gid1_lid1, gid1, &lid)); 57 lid--; 58 aj[B->i[i] + j] = lid; 59 } 60 } 61 PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 62 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 63 PetscCall(PetscTableDestroy(&gid1_lid1)); 64 #else 65 /* Make an array as long as the number of columns */ 66 /* mark those columns that are in aij->B */ 67 PetscCall(PetscCalloc1(N, &indices)); 68 for (i = 0; i < aij->B->rmap->n; i++) { 69 for (j = 0; j < B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form array of columns we need */ 76 PetscCall(PetscMalloc1(ec, &garray)); 77 ec = 0; 78 for (i = 0; i < N; i++) { 79 if (indices[i]) garray[ec++] = i; 80 } 81 82 /* make indices now point into garray */ 83 for (i = 0; i < ec; i++) { indices[garray[i]] = i; } 84 85 /* compact out the extra columns in B */ 86 for (i = 0; i < aij->B->rmap->n; i++) { 87 for (j = 0; j < B->ilen[i]; j++) { aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; } 88 } 89 PetscCall(PetscLayoutDestroy(&aij->B->cmap)); 90 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B), ec, ec, 1, &aij->B->cmap)); 91 PetscCall(PetscFree(indices)); 92 #endif 93 } else { 94 garray = aij->garray; 95 } 96 97 if (!aij->lvec) { 98 PetscCheck(aij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing B mat"); 99 PetscCall(MatCreateVecs(aij->B, &aij->lvec, NULL)); 100 } 101 PetscCall(VecGetSize(aij->lvec, &ec)); 102 103 /* create two temporary Index sets for build scatter gather */ 104 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 105 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 106 107 /* create temporary global vector to generate scatter context */ 108 /* This does not allocate the array's memory so is efficient */ 109 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 110 111 /* generate the scatter context */ 112 PetscCall(VecScatterDestroy(&aij->Mvctx)); 113 PetscCall(VecScatterCreate(gvec, from, aij->lvec, to, &aij->Mvctx)); 114 PetscCall(VecScatterViewFromOptions(aij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 115 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->Mvctx)); 116 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)aij->lvec)); 117 PetscCall(PetscLogObjectMemory((PetscObject)mat, ec * sizeof(PetscInt))); 118 aij->garray = garray; 119 120 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)from)); 121 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)to)); 122 123 PetscCall(ISDestroy(&from)); 124 PetscCall(ISDestroy(&to)); 125 PetscCall(VecDestroy(&gvec)); 126 PetscFunctionReturn(0); 127 } 128 129 /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 130 In other words, change the B from reduced format using local col ids 131 to expanded format using global col ids, which will make it easier to 132 insert new nonzeros (with global col ids) into the matrix. 133 The off-diag B determines communication in the matrix vector multiply. 134 */ 135 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 137 Mat B = aij->B, Bnew; 138 Mat_SeqAIJ *Baij = (Mat_SeqAIJ *)B->data; 139 PetscInt i, j, m = B->rmap->n, n = A->cmap->N, col, ct = 0, *garray = aij->garray, *nz, ec; 140 PetscScalar v; 141 const PetscScalar *ba; 142 143 PetscFunctionBegin; 144 /* free stuff related to matrix-vec multiply */ 145 PetscCall(VecGetSize(aij->lvec, &ec)); /* needed for PetscLogObjectMemory below */ 146 PetscCall(VecDestroy(&aij->lvec)); 147 if (aij->colmap) { 148 #if defined(PETSC_USE_CTABLE) 149 PetscCall(PetscTableDestroy(&aij->colmap)); 150 #else 151 PetscCall(PetscFree(aij->colmap)); 152 PetscCall(PetscLogObjectMemory((PetscObject)A, -aij->B->cmap->n * sizeof(PetscInt))); 153 #endif 154 } 155 156 /* make sure that B is assembled so we can access its values */ 157 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 158 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 159 160 /* invent new B and copy stuff over */ 161 PetscCall(PetscMalloc1(m + 1, &nz)); 162 for (i = 0; i < m; i++) { nz[i] = Baij->i[i + 1] - Baij->i[i]; } 163 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 164 PetscCall(MatSetSizes(Bnew, m, n, m, n)); /* Bnew now uses A->cmap->N as its col size */ 165 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 166 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 167 PetscCall(MatSeqAIJSetPreallocation(Bnew, 0, nz)); 168 169 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 170 ((Mat_SeqAIJ *)Bnew->data)->nonew = Baij->nonew; 171 } 172 173 /* 174 Ensure that B's nonzerostate is monotonically increasing. 175 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 176 */ 177 Bnew->nonzerostate = B->nonzerostate; 178 179 PetscCall(PetscFree(nz)); 180 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); 181 for (i = 0; i < m; i++) { 182 for (j = Baij->i[i]; j < Baij->i[i + 1]; j++) { 183 col = garray[Baij->j[ct]]; 184 v = ba[ct++]; 185 PetscCall(MatSetValues(Bnew, 1, &i, 1, &col, &v, B->insertmode)); 186 } 187 } 188 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 189 190 PetscCall(PetscFree(aij->garray)); 191 PetscCall(PetscLogObjectMemory((PetscObject)A, -ec * sizeof(PetscInt))); 192 PetscCall(MatDestroy(&B)); 193 PetscCall(PetscLogObjectParent((PetscObject)A, (PetscObject)Bnew)); 194 195 aij->B = Bnew; 196 A->was_assembled = PETSC_FALSE; 197 PetscFunctionReturn(0); 198 } 199 200 /* ugly stuff added for Glenn someday we should fix this up */ 201 202 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 203 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 204 205 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) { 206 Mat_MPIAIJ *ina = (Mat_MPIAIJ *)inA->data; /*access private part of matrix */ 207 PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 208 PetscInt *r_rmapd, *r_rmapo; 209 210 PetscFunctionBegin; 211 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 212 PetscCall(MatGetSize(ina->A, NULL, &n)); 213 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 214 nt = 0; 215 for (i = 0; i < inA->rmap->mapping->n; i++) { 216 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 217 nt++; 218 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 219 } 220 } 221 PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 222 PetscCall(PetscMalloc1(n + 1, &auglyrmapd)); 223 for (i = 0; i < inA->rmap->mapping->n; i++) { 224 if (r_rmapd[i]) { auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; } 225 } 226 PetscCall(PetscFree(r_rmapd)); 227 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 228 229 PetscCall(PetscCalloc1(inA->cmap->N + 1, &lindices)); 230 for (i = 0; i < ina->B->cmap->n; i++) { lindices[garray[i]] = i + 1; } 231 no = inA->rmap->mapping->n - nt; 232 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 233 nt = 0; 234 for (i = 0; i < inA->rmap->mapping->n; i++) { 235 if (lindices[inA->rmap->mapping->indices[i]]) { 236 nt++; 237 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 238 } 239 } 240 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 241 PetscCall(PetscFree(lindices)); 242 PetscCall(PetscMalloc1(nt + 1, &auglyrmapo)); 243 for (i = 0; i < inA->rmap->mapping->n; i++) { 244 if (r_rmapo[i]) { auglyrmapo[(r_rmapo[i] - 1)] = i; } 245 } 246 PetscCall(PetscFree(r_rmapo)); 247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 248 PetscFunctionReturn(0); 249 } 250 251 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A, Vec scale) { 252 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 253 254 PetscFunctionBegin; 255 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 256 PetscFunctionReturn(0); 257 } 258 259 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A, Vec scale) { 260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; /*access private part of matrix */ 261 PetscInt n, i; 262 PetscScalar *d, *o; 263 const PetscScalar *s; 264 265 PetscFunctionBegin; 266 if (!auglyrmapd) { PetscCall(MatMPIAIJDiagonalScaleLocalSetUp(A, scale)); } 267 268 PetscCall(VecGetArrayRead(scale, &s)); 269 270 PetscCall(VecGetLocalSize(auglydd, &n)); 271 PetscCall(VecGetArray(auglydd, &d)); 272 for (i = 0; i < n; i++) { d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 273 PetscCall(VecRestoreArray(auglydd, &d)); 274 /* column scale "diagonal" portion of local matrix */ 275 PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 276 277 PetscCall(VecGetLocalSize(auglyoo, &n)); 278 PetscCall(VecGetArray(auglyoo, &o)); 279 for (i = 0; i < n; i++) { o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 280 PetscCall(VecRestoreArrayRead(scale, &s)); 281 PetscCall(VecRestoreArray(auglyoo, &o)); 282 /* column scale "off-diagonal" portion of local matrix */ 283 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 284 PetscFunctionReturn(0); 285 } 286