1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 ierr = PetscLayoutDestroy(&aij->B->cmap);CHKERRQ(ierr); 64 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap);CHKERRQ(ierr); 65 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 66 #else 67 /* Make an array as long as the number of columns */ 68 /* mark those columns that are in aij->B */ 69 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 70 for (i=0; i<aij->B->rmap->n; i++) { 71 for (j=0; j<B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j]]) ec++; 73 indices[aj[B->i[i] + j]] = 1; 74 } 75 } 76 77 /* form array of columns we need */ 78 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 79 ec = 0; 80 for (i=0; i<N; i++) { 81 if (indices[i]) garray[ec++] = i; 82 } 83 84 /* make indices now point into garray */ 85 for (i=0; i<ec; i++) { 86 indices[garray[i]] = i; 87 } 88 89 /* compact out the extra columns in B */ 90 for (i=0; i<aij->B->rmap->n; i++) { 91 for (j=0; j<B->ilen[i]; j++) { 92 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 93 } 94 } 95 ierr = PetscLayoutDestroy(&aij->B->cmap);CHKERRQ(ierr); 96 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)aij->B),ec,ec,1,&aij->B->cmap);CHKERRQ(ierr); 97 ierr = PetscFree(indices);CHKERRQ(ierr); 98 #endif 99 } else { 100 garray = aij->garray; 101 } 102 103 if (!aij->lvec) { 104 /* create local vector that is used to scatter into */ 105 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 106 } else { 107 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 108 } 109 110 /* create two temporary Index sets for build scatter gather */ 111 ierr = ISCreateGeneral(PETSC_COMM_SELF,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 112 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 113 114 /* create temporary global vector to generate scatter context */ 115 /* This does not allocate the array's memory so is efficient */ 116 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 117 118 /* generate the scatter context */ 119 if (aij->Mvctx_mpi1_flg) { 120 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 121 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 122 ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 124 } else { 125 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 126 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 127 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 129 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 130 } 131 aij->garray = garray; 132 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 135 136 ierr = ISDestroy(&from);CHKERRQ(ierr); 137 ierr = ISDestroy(&to);CHKERRQ(ierr); 138 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 /* 143 Takes the local part of an already assembled MPIAIJ matrix 144 and disassembles it. This is to allow new nonzeros into the matrix 145 that require more communication in the matrix vector multiply. 146 Thus certain data-structures must be rebuilt. 147 148 Kind of slow! But that's what application programmers get when 149 they are sloppy. 150 */ 151 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 152 { 153 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 154 Mat B = aij->B,Bnew; 155 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 156 PetscErrorCode ierr; 157 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 158 PetscScalar v; 159 160 PetscFunctionBegin; 161 /* free stuff related to matrix-vec multiply */ 162 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 163 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 164 if (aij->colmap) { 165 #if defined(PETSC_USE_CTABLE) 166 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 167 #else 168 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 169 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 170 #endif 171 } 172 173 /* make sure that B is assembled so we can access its values */ 174 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 175 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 176 177 /* invent new B and copy stuff over */ 178 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 179 for (i=0; i<m; i++) { 180 nz[i] = Baij->i[i+1] - Baij->i[i]; 181 } 182 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 183 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 184 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 185 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 186 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 187 188 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 189 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 190 } 191 192 /* 193 Ensure that B's nonzerostate is monotonically increasing. 194 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 195 */ 196 Bnew->nonzerostate = B->nonzerostate; 197 198 ierr = PetscFree(nz);CHKERRQ(ierr); 199 for (i=0; i<m; i++) { 200 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 201 col = garray[Baij->j[ct]]; 202 v = Baij->a[ct++]; 203 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 204 } 205 } 206 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 207 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 208 ierr = MatDestroy(&B);CHKERRQ(ierr); 209 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 210 211 aij->B = Bnew; 212 A->was_assembled = PETSC_FALSE; 213 PetscFunctionReturn(0); 214 } 215 216 /* ugly stuff added for Glenn someday we should fix this up */ 217 218 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 219 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 220 221 222 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 223 { 224 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 225 PetscErrorCode ierr; 226 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 227 PetscInt *r_rmapd,*r_rmapo; 228 229 PetscFunctionBegin; 230 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 231 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 232 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 233 nt = 0; 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 236 nt++; 237 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 238 } 239 } 240 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 241 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 242 for (i=0; i<inA->rmap->mapping->n; i++) { 243 if (r_rmapd[i]) { 244 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 245 } 246 } 247 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 248 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 249 250 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 251 for (i=0; i<ina->B->cmap->n; i++) { 252 lindices[garray[i]] = i+1; 253 } 254 no = inA->rmap->mapping->n - nt; 255 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rmap->mapping->n; i++) { 258 if (lindices[inA->rmap->mapping->indices[i]]) { 259 nt++; 260 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 261 } 262 } 263 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 264 ierr = PetscFree(lindices);CHKERRQ(ierr); 265 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 266 for (i=0; i<inA->rmap->mapping->n; i++) { 267 if (r_rmapo[i]) { 268 auglyrmapo[(r_rmapo[i]-1)] = i; 269 } 270 } 271 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 272 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 277 { 278 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 287 { 288 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 289 PetscErrorCode ierr; 290 PetscInt n,i; 291 PetscScalar *d,*o; 292 const PetscScalar *s; 293 294 PetscFunctionBegin; 295 if (!auglyrmapd) { 296 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 297 } 298 299 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 300 301 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 302 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 303 for (i=0; i<n; i++) { 304 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 305 } 306 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 307 /* column scale "diagonal" portion of local matrix */ 308 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 309 310 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 311 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 312 for (i=0; i<n; i++) { 313 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 314 } 315 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 316 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 317 /* column scale "off-diagonal" portion of local matrix */ 318 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321