1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 aij->B->cmap->bs = 1; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 aij->B->cmap->bs = 1; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 } else { 104 garray = aij->garray; 105 } 106 107 if (!aij->lvec) { 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 110 } else { 111 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 112 } 113 114 /* create two temporary Index sets for build scatter gather */ 115 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 116 117 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 /* This does not allocate the array's memory so is efficient */ 121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 122 123 /* generate the scatter context */ 124 if (aij->Mvctx_mpi1_flg) { 125 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 129 } else { 130 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 131 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 134 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 135 } 136 aij->garray = garray; 137 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 140 141 ierr = ISDestroy(&from);CHKERRQ(ierr); 142 ierr = ISDestroy(&to);CHKERRQ(ierr); 143 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /* 148 Takes the local part of an already assembled MPIAIJ matrix 149 and disassembles it. This is to allow new nonzeros into the matrix 150 that require more communication in the matrix vector multiply. 151 Thus certain data-structures must be rebuilt. 152 153 Kind of slow! But that's what application programmers get when 154 they are sloppy. 155 */ 156 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 159 Mat B = aij->B,Bnew; 160 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 163 PetscScalar v; 164 165 PetscFunctionBegin; 166 /* free stuff related to matrix-vec multiply */ 167 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 168 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 169 if (aij->colmap) { 170 #if defined(PETSC_USE_CTABLE) 171 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 172 #else 173 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 174 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 #endif 176 } 177 178 /* make sure that B is assembled so we can access its values */ 179 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 182 /* invent new B and copy stuff over */ 183 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 184 for (i=0; i<m; i++) { 185 nz[i] = Baij->i[i+1] - Baij->i[i]; 186 } 187 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 188 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 189 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 190 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 191 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 192 193 if (Baij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 194 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; 195 } 196 197 /* 198 Ensure that B's nonzerostate is monotonically increasing. 199 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 200 */ 201 Bnew->nonzerostate = B->nonzerostate; 202 203 ierr = PetscFree(nz);CHKERRQ(ierr); 204 for (i=0; i<m; i++) { 205 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 206 col = garray[Baij->j[ct]]; 207 v = Baij->a[ct++]; 208 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 209 } 210 } 211 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 212 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 213 ierr = MatDestroy(&B);CHKERRQ(ierr); 214 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 215 216 aij->B = Bnew; 217 A->was_assembled = PETSC_FALSE; 218 PetscFunctionReturn(0); 219 } 220 221 /* ugly stuff added for Glenn someday we should fix this up */ 222 223 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 224 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 225 226 227 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 228 { 229 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 230 PetscErrorCode ierr; 231 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 232 PetscInt *r_rmapd,*r_rmapo; 233 234 PetscFunctionBegin; 235 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 236 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 237 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 238 nt = 0; 239 for (i=0; i<inA->rmap->mapping->n; i++) { 240 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 241 nt++; 242 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 243 } 244 } 245 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 246 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 247 for (i=0; i<inA->rmap->mapping->n; i++) { 248 if (r_rmapd[i]) { 249 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 250 } 251 } 252 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 253 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 254 255 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 256 for (i=0; i<ina->B->cmap->n; i++) { 257 lindices[garray[i]] = i+1; 258 } 259 no = inA->rmap->mapping->n - nt; 260 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 261 nt = 0; 262 for (i=0; i<inA->rmap->mapping->n; i++) { 263 if (lindices[inA->rmap->mapping->indices[i]]) { 264 nt++; 265 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 266 } 267 } 268 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 269 ierr = PetscFree(lindices);CHKERRQ(ierr); 270 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 271 for (i=0; i<inA->rmap->mapping->n; i++) { 272 if (r_rmapo[i]) { 273 auglyrmapo[(r_rmapo[i]-1)] = i; 274 } 275 } 276 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 277 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 278 PetscFunctionReturn(0); 279 } 280 281 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 282 { 283 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 284 PetscErrorCode ierr; 285 286 PetscFunctionBegin; 287 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 292 { 293 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 294 PetscErrorCode ierr; 295 PetscInt n,i; 296 PetscScalar *d,*o; 297 const PetscScalar *s; 298 299 PetscFunctionBegin; 300 if (!auglyrmapd) { 301 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 302 } 303 304 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 305 306 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 307 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 308 for (i=0; i<n; i++) { 309 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 310 } 311 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 312 /* column scale "diagonal" portion of local matrix */ 313 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 314 315 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 316 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 317 for (i=0; i<n; i++) { 318 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 319 } 320 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 321 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 322 /* column scale "off-diagonal" portion of local matrix */ 323 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326