1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/vecimpl.h> 7 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 8 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt N = mat->cmap->N,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 if (!aij->garray) { 27 #if defined(PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid--; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 aij->B->cmap->bs = 1; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j]]) ec++; 75 indices[aj[B->i[i] + j]] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 aij->B->cmap->bs = 1; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 } else { 104 garray = aij->garray; 105 } 106 107 if (!aij->lvec) { 108 /* create local vector that is used to scatter into */ 109 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 110 } else { 111 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 112 } 113 114 /* create two temporary Index sets for build scatter gather */ 115 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 116 117 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 118 119 /* create temporary global vector to generate scatter context */ 120 /* This does not allocate the array's memory so is efficient */ 121 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 122 123 /* generate the scatter context */ 124 if (aij->Mvctx_mpi1_flg) { 125 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 127 ierr = VecScatterSetType(aij->Mvctx_mpi1,VECSCATTERMPI1);CHKERRQ(ierr); 128 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 129 } else { 130 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 131 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 134 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 135 } 136 aij->garray = garray; 137 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 140 141 ierr = ISDestroy(&from);CHKERRQ(ierr); 142 ierr = ISDestroy(&to);CHKERRQ(ierr); 143 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 /* 148 Takes the local part of an already assembled MPIAIJ matrix 149 and disassembles it. This is to allow new nonzeros into the matrix 150 that require more communication in the matrix vector multiply. 151 Thus certain data-structures must be rebuilt. 152 153 Kind of slow! But that's what application programmers get when 154 they are sloppy. 155 */ 156 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 159 Mat B = aij->B,Bnew; 160 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 161 PetscErrorCode ierr; 162 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 163 PetscScalar v; 164 165 PetscFunctionBegin; 166 /* free stuff related to matrix-vec multiply */ 167 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 168 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 169 if (aij->colmap) { 170 #if defined(PETSC_USE_CTABLE) 171 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 172 #else 173 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 174 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 175 #endif 176 } 177 178 /* make sure that B is assembled so we can access its values */ 179 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 180 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 182 /* invent new B and copy stuff over */ 183 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 184 for (i=0; i<m; i++) { 185 nz[i] = Baij->i[i+1] - Baij->i[i]; 186 } 187 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 188 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 189 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 190 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 191 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 192 193 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 194 /* 195 Ensure that B's nonzerostate is monotonically increasing. 196 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 197 */ 198 Bnew->nonzerostate = B->nonzerostate; 199 200 ierr = PetscFree(nz);CHKERRQ(ierr); 201 for (i=0; i<m; i++) { 202 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 203 col = garray[Baij->j[ct]]; 204 v = Baij->a[ct++]; 205 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 206 } 207 } 208 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 209 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 210 ierr = MatDestroy(&B);CHKERRQ(ierr); 211 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 212 213 aij->B = Bnew; 214 A->was_assembled = PETSC_FALSE; 215 PetscFunctionReturn(0); 216 } 217 218 /* ugly stuff added for Glenn someday we should fix this up */ 219 220 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 221 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 222 223 224 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 225 { 226 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 227 PetscErrorCode ierr; 228 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 229 PetscInt *r_rmapd,*r_rmapo; 230 231 PetscFunctionBegin; 232 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 233 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 234 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 235 nt = 0; 236 for (i=0; i<inA->rmap->mapping->n; i++) { 237 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 238 nt++; 239 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 240 } 241 } 242 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 243 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 244 for (i=0; i<inA->rmap->mapping->n; i++) { 245 if (r_rmapd[i]) { 246 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 247 } 248 } 249 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 250 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 251 252 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 253 for (i=0; i<ina->B->cmap->n; i++) { 254 lindices[garray[i]] = i+1; 255 } 256 no = inA->rmap->mapping->n - nt; 257 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 258 nt = 0; 259 for (i=0; i<inA->rmap->mapping->n; i++) { 260 if (lindices[inA->rmap->mapping->indices[i]]) { 261 nt++; 262 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 263 } 264 } 265 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 266 ierr = PetscFree(lindices);CHKERRQ(ierr); 267 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 268 for (i=0; i<inA->rmap->mapping->n; i++) { 269 if (r_rmapo[i]) { 270 auglyrmapo[(r_rmapo[i]-1)] = i; 271 } 272 } 273 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 274 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 275 PetscFunctionReturn(0); 276 } 277 278 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 279 { 280 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 281 PetscErrorCode ierr; 282 283 PetscFunctionBegin; 284 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 289 { 290 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 291 PetscErrorCode ierr; 292 PetscInt n,i; 293 PetscScalar *d,*o; 294 const PetscScalar *s; 295 296 PetscFunctionBegin; 297 if (!auglyrmapd) { 298 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 299 } 300 301 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 302 303 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 304 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 305 for (i=0; i<n; i++) { 306 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 307 } 308 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 309 /* column scale "diagonal" portion of local matrix */ 310 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 311 312 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 313 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 314 for (i=0; i<n; i++) { 315 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 316 } 317 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 318 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 319 /* column scale "off-diagonal" portion of local matrix */ 320 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323