1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 #include <petsc-private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 10 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 11 { 12 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 13 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 14 PetscErrorCode ierr; 15 PetscInt i,j,*aj = B->j,ec = 0,*garray; 16 IS from,to; 17 Vec gvec; 18 PetscBool useblockis; 19 #if defined(PETSC_USE_CTABLE) 20 PetscTable gid1_lid1; 21 PetscTablePosition tpos; 22 PetscInt gid,lid; 23 #else 24 PetscInt N = mat->cmap->N,*indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 /* use a table */ 30 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<aij->B->rmap->n; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; 47 lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<aij->B->rmap->n; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid--; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 aij->B->cmap->n = aij->B->cmap->N = ec; 65 66 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 67 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 68 #else 69 /* Make an array as long as the number of columns */ 70 /* mark those columns that are in aij->B */ 71 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 72 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 73 for (i=0; i<aij->B->rmap->n; i++) { 74 for (j=0; j<B->ilen[i]; j++) { 75 if (!indices[aj[B->i[i] + j]]) ec++; 76 indices[aj[B->i[i] + j]] = 1; 77 } 78 } 79 80 /* form array of columns we need */ 81 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 82 ec = 0; 83 for (i=0; i<N; i++) { 84 if (indices[i]) garray[ec++] = i; 85 } 86 87 /* make indices now point into garray */ 88 for (i=0; i<ec; i++) { 89 indices[garray[i]] = i; 90 } 91 92 /* compact out the extra columns in B */ 93 for (i=0; i<aij->B->rmap->n; i++) { 94 for (j=0; j<B->ilen[i]; j++) { 95 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 96 } 97 } 98 aij->B->cmap->n = aij->B->cmap->N = ec; 99 100 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 /* create local vector that is used to scatter into */ 104 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 105 106 /* create two temporary Index sets for build scatter gather */ 107 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 108 useblockis = PETSC_FALSE; 109 #if 0 110 if (mat->cmap->bs > 1) { 111 PetscInt bs = mat->cmap->bs,ibs,ga; 112 if (!(ec % bs)) { 113 useblockis = PETSC_TRUE; 114 for (i=0; i<ec/bs; i++) { 115 if ((ga = garray[ibs = i*bs]) % bs) { 116 useblockis = PETSC_FALSE; 117 break; 118 } 119 for (j=1; j<bs; j++) { 120 if (garray[ibs+j] != ga+j) { 121 useblockis = PETSC_FALSE; 122 break; 123 } 124 } 125 if (!useblockis) break; 126 } 127 } 128 } 129 #if defined(PETSC_USE_DEBUG) 130 i = (PetscInt)useblockis; 131 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 132 if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 133 #endif 134 #endif 135 if (useblockis) { 136 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 137 if (ec%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 138 ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr); 139 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 140 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 141 ierr = ISCreateBlock(PetscObjectComm((PetscObject)mat),bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 142 } else { 143 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 144 } 145 146 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 147 148 /* create temporary global vector to generate scatter context */ 149 /* This does not allocate the array's memory so is efficient */ 150 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 151 152 /* generate the scatter context */ 153 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 155 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 156 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 157 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 158 159 aij->garray = garray; 160 161 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 162 ierr = ISDestroy(&from);CHKERRQ(ierr); 163 ierr = ISDestroy(&to);CHKERRQ(ierr); 164 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 165 PetscFunctionReturn(0); 166 } 167 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 171 /* 172 Takes the local part of an already assembled MPIAIJ matrix 173 and disassembles it. This is to allow new nonzeros into the matrix 174 that require more communication in the matrix vector multiply. 175 Thus certain data-structures must be rebuilt. 176 177 Kind of slow! But that's what application programmers get when 178 they are sloppy. 179 */ 180 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 181 { 182 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 183 Mat B = aij->B,Bnew; 184 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 185 PetscErrorCode ierr; 186 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 187 PetscScalar v; 188 189 PetscFunctionBegin; 190 /* free stuff related to matrix-vec multiply */ 191 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 192 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 193 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 194 if (aij->colmap) { 195 #if defined(PETSC_USE_CTABLE) 196 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 197 #else 198 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 199 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 200 #endif 201 } 202 203 /* make sure that B is assembled so we can access its values */ 204 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 205 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 206 207 /* invent new B and copy stuff over */ 208 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 209 for (i=0; i<m; i++) { 210 nz[i] = Baij->i[i+1] - Baij->i[i]; 211 } 212 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 213 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 214 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 215 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 216 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 217 218 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 219 220 ierr = PetscFree(nz);CHKERRQ(ierr); 221 for (i=0; i<m; i++) { 222 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 223 col = garray[Baij->j[ct]]; 224 v = Baij->a[ct++]; 225 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 226 } 227 } 228 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 229 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 230 ierr = MatDestroy(&B);CHKERRQ(ierr); 231 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 232 233 aij->B = Bnew; 234 A->was_assembled = PETSC_FALSE; 235 PetscFunctionReturn(0); 236 } 237 238 /* ugly stuff added for Glenn someday we should fix this up */ 239 240 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 241 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 242 243 244 #undef __FUNCT__ 245 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 246 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 247 { 248 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 249 PetscErrorCode ierr; 250 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 251 PetscInt *r_rmapd,*r_rmapo; 252 253 PetscFunctionBegin; 254 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 255 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 256 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 257 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 258 nt = 0; 259 for (i=0; i<inA->rmap->mapping->n; i++) { 260 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 261 nt++; 262 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 263 } 264 } 265 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 266 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 267 for (i=0; i<inA->rmap->mapping->n; i++) { 268 if (r_rmapd[i]) { 269 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 270 } 271 } 272 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 274 275 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 276 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 277 for (i=0; i<ina->B->cmap->n; i++) { 278 lindices[garray[i]] = i+1; 279 } 280 no = inA->rmap->mapping->n - nt; 281 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 282 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 283 nt = 0; 284 for (i=0; i<inA->rmap->mapping->n; i++) { 285 if (lindices[inA->rmap->mapping->indices[i]]) { 286 nt++; 287 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 288 } 289 } 290 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 291 ierr = PetscFree(lindices);CHKERRQ(ierr); 292 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 293 for (i=0; i<inA->rmap->mapping->n; i++) { 294 if (r_rmapo[i]) { 295 auglyrmapo[(r_rmapo[i]-1)] = i; 296 } 297 } 298 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 299 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 305 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 306 { 307 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 308 PetscErrorCode ierr; 309 310 PetscFunctionBegin; 311 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 315 EXTERN_C_BEGIN 316 #undef __FUNCT__ 317 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 318 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 319 { 320 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 321 PetscErrorCode ierr; 322 PetscInt n,i; 323 PetscScalar *d,*o,*s; 324 325 PetscFunctionBegin; 326 if (!auglyrmapd) { 327 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 328 } 329 330 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 331 332 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 333 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 334 for (i=0; i<n; i++) { 335 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 336 } 337 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 338 /* column scale "diagonal" portion of local matrix */ 339 ierr = MatDiagonalScale(a->A,NULL,auglydd);CHKERRQ(ierr); 340 341 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 342 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 343 for (i=0; i<n; i++) { 344 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 345 } 346 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 347 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 348 /* column scale "off-diagonal" portion of local matrix */ 349 ierr = MatDiagonalScale(a->B,NULL,auglyoo);CHKERRQ(ierr); 350 PetscFunctionReturn(0); 351 } 352 EXTERN_C_END 353 354 355