1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,*garray; 15 PetscInt ec = 0; /* Number of nonzero external columns */ 16 IS from,to; 17 Vec gvec; 18 #if defined(PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 if (!aij->garray) { 28 if (!aij->B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 29 #if defined(PETSC_USE_CTABLE) 30 /* use a table */ 31 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 32 for (i=0; i<aij->B->rmap->n; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 35 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 36 if (!data) { 37 /* one based table */ 38 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 39 } 40 } 41 } 42 /* form array of columns we need */ 43 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 44 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 45 while (tpos) { 46 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 47 gid--; 48 lid--; 49 garray[lid] = gid; 50 } 51 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 52 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 53 for (i=0; i<ec; i++) { 54 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 55 } 56 /* compact out the extra columns in B */ 57 for (i=0; i<aij->B->rmap->n; i++) { 58 for (j=0; j<B->ilen[i]; j++) { 59 PetscInt gid1 = aj[B->i[i] + j] + 1; 60 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 61 lid--; 62 aj[B->i[i] + j] = lid; 63 } 64 } 65 ierr = PetscLayoutDestroy(&aij->B->cmap);CHKERRQ(ierr); 66 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap);CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 ierr = PetscLayoutDestroy(&aij->B->cmap);CHKERRQ(ierr); 98 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap);CHKERRQ(ierr); 99 ierr = PetscFree(indices);CHKERRQ(ierr); 100 #endif 101 } else { 102 garray = aij->garray; 103 } 104 105 if (!aij->lvec) { 106 if (!aij->B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing B mat"); 107 ierr = MatCreateVecs(aij->B,&aij->lvec,NULL);CHKERRQ(ierr); 108 } 109 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 110 111 /* create two temporary Index sets for build scatter gather */ 112 ierr = ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 113 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 114 115 /* create temporary global vector to generate scatter context */ 116 /* This does not allocate the array's memory so is efficient */ 117 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 118 119 /* generate the scatter context */ 120 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 121 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 122 ierr = VecScatterViewFromOptions(aij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view");CHKERRQ(ierr); 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 125 ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr); 126 aij->garray = garray; 127 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 130 131 ierr = ISDestroy(&from);CHKERRQ(ierr); 132 ierr = ISDestroy(&to);CHKERRQ(ierr); 133 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 137 /* Disassemble the off-diag portion of the MPIAIJXxx matrix. 138 In other words, change the B from reduced format using local col ids 139 to expanded format using global col ids, which will make it easier to 140 insert new nonzeros (with global col ids) into the matrix. 141 The off-diag B determines communication in the matrix vector multiply. 142 */ 143 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 144 { 145 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 146 Mat B = aij->B,Bnew; 147 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 148 PetscErrorCode ierr; 149 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 150 PetscScalar v; 151 const PetscScalar *ba; 152 153 PetscFunctionBegin; 154 /* free stuff related to matrix-vec multiply */ 155 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 156 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 157 if (aij->colmap) { 158 #if defined(PETSC_USE_CTABLE) 159 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 160 #else 161 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 162 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 163 #endif 164 } 165 166 /* make sure that B is assembled so we can access its values */ 167 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 169 170 /* invent new B and copy stuff over */ 171 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 172 for (i=0; i<m; i++) { 173 nz[i] = Baij->i[i+1] - Baij->i[i]; 174 } 175 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 176 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); /* Bnew now uses A->cmap->N as its col size */ 177 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 178 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 179 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 180 181 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 182 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 183 } 184 185 /* 186 Ensure that B's nonzerostate is monotonically increasing. 187 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 188 */ 189 Bnew->nonzerostate = B->nonzerostate; 190 191 ierr = PetscFree(nz);CHKERRQ(ierr); 192 ierr = MatSeqAIJGetArrayRead(B,&ba);CHKERRQ(ierr); 193 for (i=0; i<m; i++) { 194 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 195 col = garray[Baij->j[ct]]; 196 v = ba[ct++]; 197 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 198 } 199 } 200 ierr = MatSeqAIJRestoreArrayRead(B,&ba);CHKERRQ(ierr); 201 202 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 203 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 204 ierr = MatDestroy(&B);CHKERRQ(ierr); 205 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 206 207 aij->B = Bnew; 208 A->was_assembled = PETSC_FALSE; 209 PetscFunctionReturn(0); 210 } 211 212 /* ugly stuff added for Glenn someday we should fix this up */ 213 214 static PetscInt *auglyrmapd = NULL,*auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 215 static Vec auglydd = NULL,auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 216 217 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 218 { 219 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 220 PetscErrorCode ierr; 221 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 222 PetscInt *r_rmapd,*r_rmapo; 223 224 PetscFunctionBegin; 225 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 226 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 227 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 228 nt = 0; 229 for (i=0; i<inA->rmap->mapping->n; i++) { 230 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 231 nt++; 232 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 233 } 234 } 235 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT,nt,n); 236 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 237 for (i=0; i<inA->rmap->mapping->n; i++) { 238 if (r_rmapd[i]) { 239 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 240 } 241 } 242 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 243 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 244 245 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 246 for (i=0; i<ina->B->cmap->n; i++) { 247 lindices[garray[i]] = i+1; 248 } 249 no = inA->rmap->mapping->n - nt; 250 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 251 nt = 0; 252 for (i=0; i<inA->rmap->mapping->n; i++) { 253 if (lindices[inA->rmap->mapping->indices[i]]) { 254 nt++; 255 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 256 } 257 } 258 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT,nt,n); 259 ierr = PetscFree(lindices);CHKERRQ(ierr); 260 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 261 for (i=0; i<inA->rmap->mapping->n; i++) { 262 if (r_rmapo[i]) { 263 auglyrmapo[(r_rmapo[i]-1)] = i; 264 } 265 } 266 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 267 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 272 { 273 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 282 { 283 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 284 PetscErrorCode ierr; 285 PetscInt n,i; 286 PetscScalar *d,*o; 287 const PetscScalar *s; 288 289 PetscFunctionBegin; 290 if (!auglyrmapd) { 291 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 292 } 293 294 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 295 296 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 297 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 298 for (i=0; i<n; i++) { 299 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 300 } 301 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 302 /* column scale "diagonal" portion of local matrix */ 303 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 304 305 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 306 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 307 for (i=0; i<n; i++) { 308 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 309 } 310 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 311 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 312 /* column scale "off-diagonal" portion of local matrix */ 313 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316