1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 9 { 10 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 11 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 IS from,to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscTable gid1_lid1; 18 PetscTablePosition tpos; 19 PetscInt gid,lid; 20 #else 21 PetscInt N = mat->cmap->N,*indices; 22 #endif 23 24 PetscFunctionBegin; 25 if (!aij->garray) { 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table */ 28 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<aij->B->rmap->n; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 49 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 50 for (i=0; i<ec; i++) { 51 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 52 } 53 /* compact out the extra columns in B */ 54 for (i=0; i<aij->B->rmap->n; i++) { 55 for (j=0; j<B->ilen[i]; j++) { 56 PetscInt gid1 = aj[B->i[i] + j] + 1; 57 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 58 lid--; 59 aj[B->i[i] + j] = lid; 60 } 61 } 62 aij->B->cmap->n = aij->B->cmap->N = ec; 63 aij->B->cmap->bs = 1; 64 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscCalloc1(N+1,&indices);CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j]]) ec++; 74 indices[aj[B->i[i] + j]] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 aij->B->cmap->bs = 1; 98 99 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 100 ierr = PetscFree(indices);CHKERRQ(ierr); 101 #endif 102 } else { 103 garray = aij->garray; 104 } 105 106 if (!aij->lvec) { 107 /* create local vector that is used to scatter into */ 108 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 109 } else { 110 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); 111 } 112 113 /* create two temporary Index sets for build scatter gather */ 114 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 115 116 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 117 118 /* create temporary global vector to generate scatter context */ 119 /* This does not allocate the array's memory so is efficient */ 120 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 121 122 /* generate the scatter context */ 123 if (aij->Mvctx_mpi1_flg) { 124 ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr); 125 ierr = VecScatterCreateMPI1(gvec,from,aij->lvec,to,&aij->Mvctx_mpi1);CHKERRQ(ierr); 126 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx_mpi1);CHKERRQ(ierr); 127 } else { 128 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 129 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 130 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->Mvctx);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)aij->lvec);CHKERRQ(ierr); 132 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 133 } 134 aij->garray = garray; 135 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 138 139 ierr = ISDestroy(&from);CHKERRQ(ierr); 140 ierr = ISDestroy(&to);CHKERRQ(ierr); 141 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 /* 146 Takes the local part of an already assembled MPIAIJ matrix 147 and disassembles it. This is to allow new nonzeros into the matrix 148 that require more communication in the matrix vector multiply. 149 Thus certain data-structures must be rebuilt. 150 151 Kind of slow! But that's what application programmers get when 152 they are sloppy. 153 */ 154 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 155 { 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 Mat B = aij->B,Bnew; 158 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 159 PetscErrorCode ierr; 160 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 161 PetscScalar v; 162 163 PetscFunctionBegin; 164 /* free stuff related to matrix-vec multiply */ 165 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 166 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 167 if (aij->colmap) { 168 #if defined(PETSC_USE_CTABLE) 169 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 170 #else 171 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 172 ierr = PetscLogObjectMemory((PetscObject)A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 173 #endif 174 } 175 176 /* make sure that B is assembled so we can access its values */ 177 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 179 180 /* invent new B and copy stuff over */ 181 ierr = PetscMalloc1(m+1,&nz);CHKERRQ(ierr); 182 for (i=0; i<m; i++) { 183 nz[i] = Baij->i[i+1] - Baij->i[i]; 184 } 185 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 186 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 187 ierr = MatSetBlockSizesFromMats(Bnew,A,A);CHKERRQ(ierr); 188 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 189 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 190 191 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 192 /* 193 Ensure that B's nonzerostate is monotonically increasing. 194 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 195 */ 196 Bnew->nonzerostate = B->nonzerostate; 197 198 ierr = PetscFree(nz);CHKERRQ(ierr); 199 for (i=0; i<m; i++) { 200 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 201 col = garray[Baij->j[ct]]; 202 v = Baij->a[ct++]; 203 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 204 } 205 } 206 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 207 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 208 ierr = MatDestroy(&B);CHKERRQ(ierr); 209 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 210 211 aij->B = Bnew; 212 A->was_assembled = PETSC_FALSE; 213 PetscFunctionReturn(0); 214 } 215 216 /* ugly stuff added for Glenn someday we should fix this up */ 217 218 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 219 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 220 221 222 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 223 { 224 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 225 PetscErrorCode ierr; 226 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 227 PetscInt *r_rmapd,*r_rmapo; 228 229 PetscFunctionBegin; 230 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 231 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 232 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 233 nt = 0; 234 for (i=0; i<inA->rmap->mapping->n; i++) { 235 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 236 nt++; 237 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 238 } 239 } 240 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 241 ierr = PetscMalloc1(n+1,&auglyrmapd);CHKERRQ(ierr); 242 for (i=0; i<inA->rmap->mapping->n; i++) { 243 if (r_rmapd[i]) { 244 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 245 } 246 } 247 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 248 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 249 250 ierr = PetscCalloc1(inA->cmap->N+1,&lindices);CHKERRQ(ierr); 251 for (i=0; i<ina->B->cmap->n; i++) { 252 lindices[garray[i]] = i+1; 253 } 254 no = inA->rmap->mapping->n - nt; 255 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 256 nt = 0; 257 for (i=0; i<inA->rmap->mapping->n; i++) { 258 if (lindices[inA->rmap->mapping->indices[i]]) { 259 nt++; 260 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 261 } 262 } 263 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 264 ierr = PetscFree(lindices);CHKERRQ(ierr); 265 ierr = PetscMalloc1(nt+1,&auglyrmapo);CHKERRQ(ierr); 266 for (i=0; i<inA->rmap->mapping->n; i++) { 267 if (r_rmapo[i]) { 268 auglyrmapo[(r_rmapo[i]-1)] = i; 269 } 270 } 271 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 272 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 273 PetscFunctionReturn(0); 274 } 275 276 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 277 { 278 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 279 PetscErrorCode ierr; 280 281 PetscFunctionBegin; 282 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 287 { 288 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 289 PetscErrorCode ierr; 290 PetscInt n,i; 291 PetscScalar *d,*o; 292 const PetscScalar *s; 293 294 PetscFunctionBegin; 295 if (!auglyrmapd) { 296 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 297 } 298 299 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 300 301 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 302 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 303 for (i=0; i<n; i++) { 304 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 305 } 306 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 307 /* column scale "diagonal" portion of local matrix */ 308 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 309 310 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 311 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 312 for (i=0; i<n; i++) { 313 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 314 } 315 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 316 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 317 /* column scale "off-diagonal" portion of local matrix */ 318 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321