1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc-private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 IS from,to; 17 Vec gvec; 18 PetscBool useblockis; 19 #if defined(PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt N = mat->cmap->N,*indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 /* use a table */ 30 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<aij->B->rmap->n; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; 47 lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<aij->B->rmap->n; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid--; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 aij->B->cmap->n = aij->B->cmap->N = ec; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 72 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 73 for (i=0; i<aij->B->rmap->n; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j]] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 82 ec = 0; 83 for (i=0; i<N; i++) { 84 if (indices[i]) garray[ec++] = i; 85 } 86 87 /* make indices now point into garray */ 88 for (i=0; i<ec; i++) { 89 indices[garray[i]] = i; 90 } 91 92 /* compact out the extra columns in B */ 93 for (i=0; i<aij->B->rmap->n; i++) { 94 for (j=0; j<B->ilen[i]; j++) { 95 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 96 } 97 } 98 aij->B->cmap->n = aij->B->cmap->N = ec; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 /* create local vector that is used to scatter into */ 104 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 105 106 /* create two temporary Index sets for build scatter gather */ 107 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 108 useblockis = PETSC_FALSE; 109 if (mat->cmap->bs > 1) { 110 PetscInt bs = mat->cmap->bs,ibs,ga; 111 if (!(ec % bs)) { 112 useblockis = PETSC_TRUE; 113 for (i=0; i<ec/bs; i++) { 114 if ((ga = garray[ibs = i*bs]) % bs) { 115 useblockis = PETSC_FALSE; 116 break; 117 } 118 for (j=1; j<bs; j++) { 119 if (garray[ibs+j] != ga+j) { 120 useblockis = PETSC_FALSE; 121 break; 122 } 123 } 124 if (!useblockis) break; 125 } 126 } 127 } 128 #if defined(PETSC_USE_DEBUG) 129 i = (PetscInt)useblockis; 130 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 131 if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 132 #endif 133 134 if (useblockis) { 135 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 136 if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 137 ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr); 138 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 139 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 140 ierr = ISCreateBlock(PetscObjectComm((PetscObject)mat),bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 141 } else { 142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 143 } 144 145 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 146 147 /* create temporary global vector to generate scatter context */ 148 /* This does not allocate the array's memory so is efficient */ 149 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 150 151 /* generate the scatter context */ 152 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 155 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 156 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 157 158 aij->garray = garray; 159 160 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 161 ierr = ISDestroy(&from);CHKERRQ(ierr); 162 ierr = ISDestroy(&to);CHKERRQ(ierr); 163 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 164 PetscFunctionReturn(0); 165 } 166 167 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 170 /* 171 Takes the local part of an already assembled MPIAIJ matrix 172 and disassembles it. This is to allow new nonzeros into the matrix 173 that require more communication in the matrix vector multiply. 174 Thus certain data-structures must be rebuilt. 175 176 Kind of slow! But that's what application programmers get when 177 they are sloppy. 178 */ 179 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 180 { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 182 Mat B = aij->B,Bnew; 183 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 184 PetscErrorCode ierr; 185 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 186 PetscScalar v; 187 188 PetscFunctionBegin; 189 /* free stuff related to matrix-vec multiply */ 190 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 191 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 192 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 193 if (aij->colmap) { 194 #if defined(PETSC_USE_CTABLE) 195 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 196 #else 197 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 198 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 199 #endif 200 } 201 202 /* make sure that B is assembled so we can access its values */ 203 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 204 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205 206 /* invent new B and copy stuff over */ 207 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 208 for (i=0; i<m; i++) { 209 nz[i] = Baij->i[i+1] - Baij->i[i]; 210 } 211 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 212 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 213 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 214 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 215 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 216 217 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 218 219 ierr = PetscFree(nz);CHKERRQ(ierr); 220 for (i=0; i<m; i++) { 221 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 222 col = garray[Baij->j[ct]]; 223 v = Baij->a[ct++]; 224 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 225 } 226 } 227 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 228 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 229 ierr = MatDestroy(&B);CHKERRQ(ierr); 230 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 231 232 aij->B = Bnew; 233 A->was_assembled = PETSC_FALSE; 234 PetscFunctionReturn(0); 235 } 236 237 /* ugly stuff added for Glenn someday we should fix this up */ 238 239 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 240 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 241 242 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 245 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 246 { 247 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 248 PetscErrorCode ierr; 249 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 250 PetscInt *r_rmapd,*r_rmapo; 251 252 PetscFunctionBegin; 253 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 254 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 255 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 256 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 257 nt = 0; 258 for (i=0; i<inA->rmap->mapping->n; i++) { 259 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 260 nt++; 261 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 262 } 263 } 264 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 265 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 266 for (i=0; i<inA->rmap->mapping->n; i++) { 267 if (r_rmapd[i]) { 268 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 269 } 270 } 271 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 272 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 273 274 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 275 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 276 for (i=0; i<ina->B->cmap->n; i++) { 277 lindices[garray[i]] = i+1; 278 } 279 no = inA->rmap->mapping->n - nt; 280 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 281 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 282 nt = 0; 283 for (i=0; i<inA->rmap->mapping->n; i++) { 284 if (lindices[inA->rmap->mapping->indices[i]]) { 285 nt++; 286 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 287 } 288 } 289 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 290 ierr = PetscFree(lindices);CHKERRQ(ierr); 291 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 292 for (i=0; i<inA->rmap->mapping->n; i++) { 293 if (r_rmapo[i]) { 294 auglyrmapo[(r_rmapo[i]-1)] = i; 295 } 296 } 297 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 298 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 299 PetscFunctionReturn(0); 300 } 301 302 #undef __FUNCT__ 303 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 304 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 305 { 306 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 307 PetscErrorCode ierr; 308 309 PetscFunctionBegin; 310 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 EXTERN_C_BEGIN 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 317 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 318 { 319 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 320 PetscErrorCode ierr; 321 PetscInt n,i; 322 PetscScalar *d,*o,*s; 323 324 PetscFunctionBegin; 325 if (!auglyrmapd) { 326 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 327 } 328 329 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 330 331 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 332 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 333 for (i=0; i<n; i++) { 334 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 335 } 336 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 337 /* column scale "diagonal" portion of local matrix */ 338 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 339 340 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 341 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 342 for (i=0; i<n; i++) { 343 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 344 } 345 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 346 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 347 /* column scale "off-diagonal" portion of local matrix */ 348 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 EXTERN_C_END 352 353 354