1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscInt i,j,*aj = B->j,*garray; 14 PetscInt ec = 0; /* Number of nonzero external columns */ 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 28 #if defined(PETSC_USE_CTABLE) 29 /* use a table */ 30 CHKERRQ(PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1)); 31 for (i=0; i<aij->B->rmap->n; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 34 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&data)); 35 if (!data) { 36 /* one based table */ 37 CHKERRQ(PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES)); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 CHKERRQ(PetscMalloc1(ec,&garray)); 43 CHKERRQ(PetscTableGetHeadPosition(gid1_lid1,&tpos)); 44 while (tpos) { 45 CHKERRQ(PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid)); 46 gid--; 47 lid--; 48 garray[lid] = gid; 49 } 50 CHKERRQ(PetscSortInt(ec,garray)); /* sort, and rebuild */ 51 CHKERRQ(PetscTableRemoveAll(gid1_lid1)); 52 for (i=0; i<ec; i++) { 53 CHKERRQ(PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES)); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<aij->B->rmap->n; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 CHKERRQ(PetscTableFind(gid1_lid1,gid1,&lid)); 60 lid--; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 CHKERRQ(PetscLayoutDestroy(&aij->B->cmap)); 65 CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap)); 66 CHKERRQ(PetscTableDestroy(&gid1_lid1)); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 CHKERRQ(PetscCalloc1(N,&indices)); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 CHKERRQ(PetscMalloc1(ec,&garray)); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 CHKERRQ(PetscLayoutDestroy(&aij->B->cmap)); 97 CHKERRQ(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap)); 98 CHKERRQ(PetscFree(indices)); 99 #endif 100 } else { 101 garray = aij->garray; 102 } 103 104 if (!aij->lvec) { 105 PetscCheck(aij->B,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 106 CHKERRQ(MatCreateVecs(aij->B,&aij->lvec,NULL)); 107 } 108 CHKERRQ(VecGetSize(aij->lvec,&ec)); 109 110 /* create two temporary Index sets for build scatter gather */ 111 CHKERRQ(ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from)); 112 CHKERRQ(ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to)); 113 114 /* create temporary global vector to generate scatter context */ 115 /* This does not allocate the array's memory so is efficient */ 116 CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec)); 117 118 /* generate the scatter context */ 119 CHKERRQ(VecScatterDestroy(&aij->Mvctx)); 120 CHKERRQ(VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx)); 121 CHKERRQ(VecScatterViewFromOptions(aij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view")); 122 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx)); 123 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec)); 124 CHKERRQ(PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt))); 125 aij->garray = garray; 126 127 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)from)); 128 CHKERRQ(PetscLogObjectParent((PetscObject)mat,(PetscObject)to)); 129 130 CHKERRQ(ISDestroy(&from)); 131 CHKERRQ(ISDestroy(&to)); 132 CHKERRQ(VecDestroy(&gvec)); 133 PetscFunctionReturn(0); 134 } 135 136 /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 137 In other words, change the B from reduced format using local col ids 138 to expanded format using global col ids, which will make it easier to 139 insert new nonzeros (with global col ids) into the matrix. 140 The off-diag B determines communication in the matrix vector multiply. 141 */ 142 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 143 { 144 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 145 Mat B = aij->B,Bnew; 146 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 147 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 148 PetscScalar v; 149 const PetscScalar *ba; 150 151 PetscFunctionBegin; 152 /* free stuff related to matrix-vec multiply */ 153 CHKERRQ(VecGetSize(aij->lvec,&ec)); /* needed for PetscLogObjectMemory below */ 154 CHKERRQ(VecDestroy(&aij->lvec)); 155 if (aij->colmap) { 156 #if defined(PETSC_USE_CTABLE) 157 CHKERRQ(PetscTableDestroy(&aij->colmap)); 158 #else 159 CHKERRQ(PetscFree(aij->colmap)); 160 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt))); 161 #endif 162 } 163 164 /* make sure that B is assembled so we can access its values */ 165 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 166 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 167 168 /* invent new B and copy stuff over */ 169 CHKERRQ(PetscMalloc1(m+1,&nz)); 170 for (i=0; i<m; i++) { 171 nz[i] = Baij->i[i+1] - Baij->i[i]; 172 } 173 CHKERRQ(MatCreate(PETSC_COMM_SELF,&Bnew)); 174 CHKERRQ(MatSetSizes(Bnew,m,n,m,n)); /* Bnew now uses A->cmap->N as its col size */ 175 CHKERRQ(MatSetBlockSizesFromMats(Bnew,A,A)); 176 CHKERRQ(MatSetType(Bnew,((PetscObject)B)->type_name)); 177 CHKERRQ(MatSeqAIJSetPreallocation(Bnew,0,nz)); 178 179 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 180 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 181 } 182 183 /* 184 Ensure that B's nonzerostate is monotonically increasing. 185 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 186 */ 187 Bnew->nonzerostate = B->nonzerostate; 188 189 CHKERRQ(PetscFree(nz)); 190 CHKERRQ(MatSeqAIJGetArrayRead(B,&ba)); 191 for (i=0; i<m; i++) { 192 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 193 col = garray[Baij->j[ct]]; 194 v = ba[ct++]; 195 CHKERRQ(MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode)); 196 } 197 } 198 CHKERRQ(MatSeqAIJRestoreArrayRead(B,&ba)); 199 200 CHKERRQ(PetscFree(aij->garray)); 201 CHKERRQ(PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt))); 202 CHKERRQ(MatDestroy(&B)); 203 CHKERRQ(PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew)); 204 205 aij->B = Bnew; 206 A->was_assembled = PETSC_FALSE; 207 PetscFunctionReturn(0); 208 } 209 210 /* ugly stuff added for Glenn someday we should fix this up */ 211 212 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 213 static Vec auglydd = NULL,auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 214 215 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 218 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 219 PetscInt *r_rmapd,*r_rmapo; 220 221 PetscFunctionBegin; 222 CHKERRQ(MatGetOwnershipRange(inA,&cstart,&cend)); 223 CHKERRQ(MatGetSize(ina->A,NULL,&n)); 224 CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd)); 225 nt = 0; 226 for (i=0; i<inA->rmap->mapping->n; i++) { 227 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 228 nt++; 229 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 230 } 231 } 232 PetscCheckFalse(nt != n,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n); 233 CHKERRQ(PetscMalloc1(n+1,&auglyrmapd)); 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (r_rmapd[i]) { 236 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 237 } 238 } 239 CHKERRQ(PetscFree(r_rmapd)); 240 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,n,&auglydd)); 241 242 CHKERRQ(PetscCalloc1(inA->cmap->N+1,&lindices)); 243 for (i=0; i<ina->B->cmap->n; i++) { 244 lindices[garray[i]] = i+1; 245 } 246 no = inA->rmap->mapping->n - nt; 247 CHKERRQ(PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo)); 248 nt = 0; 249 for (i=0; i<inA->rmap->mapping->n; i++) { 250 if (lindices[inA->rmap->mapping->indices[i]]) { 251 nt++; 252 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 253 } 254 } 255 PetscCheckFalse(nt > no,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 256 CHKERRQ(PetscFree(lindices)); 257 CHKERRQ(PetscMalloc1(nt+1,&auglyrmapo)); 258 for (i=0; i<inA->rmap->mapping->n; i++) { 259 if (r_rmapo[i]) { 260 auglyrmapo[(r_rmapo[i]-1)] = i; 261 } 262 } 263 CHKERRQ(PetscFree(r_rmapo)); 264 CHKERRQ(VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo)); 265 PetscFunctionReturn(0); 266 } 267 268 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 269 { 270 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 271 272 PetscFunctionBegin; 273 CHKERRQ(PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale))); 274 PetscFunctionReturn(0); 275 } 276 277 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 278 { 279 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 280 PetscInt n,i; 281 PetscScalar *d,*o; 282 const PetscScalar *s; 283 284 PetscFunctionBegin; 285 if (!auglyrmapd) { 286 CHKERRQ(MatMPIAIJDiagonalScaleLocalSetUp(A,scale)); 287 } 288 289 CHKERRQ(VecGetArrayRead(scale,&s)); 290 291 CHKERRQ(VecGetLocalSize(auglydd,&n)); 292 CHKERRQ(VecGetArray(auglydd,&d)); 293 for (i=0; i<n; i++) { 294 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 295 } 296 CHKERRQ(VecRestoreArray(auglydd,&d)); 297 /* column scale "diagonal" portion of local matrix */ 298 CHKERRQ(MatDiagonalScale(a->A,NULL,auglydd)); 299 300 CHKERRQ(VecGetLocalSize(auglyoo,&n)); 301 CHKERRQ(VecGetArray(auglyoo,&o)); 302 for (i=0; i<n; i++) { 303 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 304 } 305 CHKERRQ(VecRestoreArrayRead(scale,&s)); 306 CHKERRQ(VecRestoreArray(auglyoo,&o)); 307 /* column scale "off-diagonal" portion of local matrix */ 308 CHKERRQ(MatDiagonalScale(a->B,NULL,auglyoo)); 309 PetscFunctionReturn(0); 310 } 311