1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt N = mat->cmap->N,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 if (!aij->garray) { 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table */ 28 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<aij->B->rmap->n; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 49 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 50 for (i=0; i<ec; i++) { 51 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 52 } 53 /* compact out the extra columns in B */ 54 for (i=0; i<aij->B->rmap->n; i++) { 55 for (j=0; j<B->ilen[i]; j++) { 56 PetscInt gid1 = aj[B->i[i] + j] + 1; 57 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58 lid--; 59 aj[B->i[i] + j] = lid; 60 } 61 } 62 aij->B->cmap->n = aij->B->cmap->N = ec; 63 aij->B->cmap->bs = 1; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 aij->B->cmap->bs = 1; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 } else { 103 garray = aij->garray; 104 } 105 106 if (!aij->lvec) { 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 109 } else { 110 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 111 } 112 113 /* create two temporary Index sets for build scatter gather */ 114 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 115 116 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 /* This does not allocate the array's memory so is efficient */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 /* generate the scatter context */ 123 if (aij->Mvctx_mpi1_flg) { 124 ierr = VecScatterCreateMPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 125 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 126 } else { 127 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 130 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 131 aij->garray = garray; 132 } 133 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 136 137 ierr = ISDestroy(&from);CHKERRQ(ierr); 138 ierr = ISDestroy(&to);CHKERRQ(ierr); 139 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 /* 144 Takes the local part of an already assembled MPIAIJ matrix 145 and disassembles it. This is to allow new nonzeros into the matrix 146 that require more communication in the matrix vector multiply. 147 Thus certain data-structures must be rebuilt. 148 149 Kind of slow! But that's what application programmers get when 150 they are sloppy. 151 */ 152 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 153 { 154 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 155 Mat B = aij->B,Bnew; 156 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 157 PetscErrorCode ierr; 158 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 159 PetscScalar v; 160 161 PetscFunctionBegin; 162 /* free stuff related to matrix-vec multiply */ 163 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 164 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 165 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 166 if (aij->colmap) { 167 #if defined(PETSC_USE_CTABLE) 168 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 169 #else 170 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 171 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 172 #endif 173 } 174 175 /* make sure that B is assembled so we can access its values */ 176 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178 179 /* invent new B and copy stuff over */ 180 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 181 for (i=0; i<m; i++) { 182 nz[i] = Baij->i[i+1] - Baij->i[i]; 183 } 184 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 185 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 186 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 187 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 188 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 189 190 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 191 /* 192 Ensure that B's nonzerostate is monotonically increasing. 193 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 194 */ 195 Bnew->nonzerostate = B->nonzerostate; 196 197 ierr = PetscFree(nz);CHKERRQ(ierr); 198 for (i=0; i<m; i++) { 199 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 200 col = garray[Baij->j[ct]]; 201 v = Baij->a[ct++]; 202 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 203 } 204 } 205 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 206 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = MatDestroy(&B);CHKERRQ(ierr); 208 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 209 210 aij->B = Bnew; 211 A->was_assembled = PETSC_FALSE; 212 PetscFunctionReturn(0); 213 } 214 215 /* ugly stuff added for Glenn someday we should fix this up */ 216 217 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 218 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 219 220 221 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 222 { 223 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 224 PetscErrorCode ierr; 225 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 226 PetscInt *r_rmapd,*r_rmapo; 227 228 PetscFunctionBegin; 229 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 230 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 231 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 232 nt = 0; 233 for (i=0; i<inA->rmap->mapping->n; i++) { 234 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 235 nt++; 236 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 237 } 238 } 239 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 240 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 241 for (i=0; i<inA->rmap->mapping->n; i++) { 242 if (r_rmapd[i]) { 243 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 244 } 245 } 246 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 247 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 248 249 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 250 for (i=0; i<ina->B->cmap->n; i++) { 251 lindices[garray[i]] = i+1; 252 } 253 no = inA->rmap->mapping->n - nt; 254 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->rmap->mapping->n; i++) { 257 if (lindices[inA->rmap->mapping->indices[i]]) { 258 nt++; 259 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 260 } 261 } 262 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 263 ierr = PetscFree(lindices);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->mapping->n; i++) { 266 if (r_rmapo[i]) { 267 auglyrmapo[(r_rmapo[i]-1)] = i; 268 } 269 } 270 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 271 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 276 { 277 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 286 { 287 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 288 PetscErrorCode ierr; 289 PetscInt n,i; 290 PetscScalar *d,*o; 291 const PetscScalar *s; 292 293 PetscFunctionBegin; 294 if (!auglyrmapd) { 295 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 296 } 297 298 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 299 300 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 301 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 302 for (i=0; i<n; i++) { 303 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 304 } 305 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 306 /* column scale "diagonal" portion of local matrix */ 307 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 308 309 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 310 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 311 for (i=0; i<n; i++) { 312 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 313 } 314 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 315 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 316 /* column scale "off-diagonal" portion of local matrix */ 317 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320