1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 PetscBool useblockis; 18 #if defined (PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined (PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid --; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 65 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 66 #else 67 /* Make an array as long as the number of columns */ 68 /* mark those columns that are in aij->B */ 69 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 70 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j] ]) ec++; 74 indices[aj[B->i[i] + j] ] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 98 ierr = PetscFree(indices);CHKERRQ(ierr); 99 #endif 100 /* create local vector that is used to scatter into */ 101 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 102 103 /* create two temporary Index sets for build scatter gather */ 104 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 105 useblockis = PETSC_FALSE; 106 if (mat->cmap->bs > 1) { 107 PetscInt bs = mat->cmap->bs,ibs,ga; 108 if (!(ec % bs)) { 109 useblockis = PETSC_TRUE; 110 for (i=0; i<ec/bs; i++) { 111 if ((ga = garray[ibs = i*bs]) % bs) { 112 useblockis = PETSC_FALSE; 113 break; 114 } 115 for (j=1; j<bs; j++) { 116 if (garray[ibs+j] != ga+j) { 117 useblockis = PETSC_FALSE; 118 break; 119 } 120 } 121 if (!useblockis) break; 122 } 123 } 124 } 125 #if defined(PETSC_USE_DEBUG) 126 i = (PetscInt)useblockis; 127 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm);CHKERRQ(ierr); 128 if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 129 #endif 130 131 if (useblockis) { 132 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 133 if (ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 134 ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr); 135 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 136 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 137 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 138 } else { 139 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 140 } 141 142 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 143 144 /* create temporary global vector to generate scatter context */ 145 /* This does not allocate the array's memory so is efficient */ 146 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 147 148 /* generate the scatter context */ 149 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 150 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 151 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 152 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 154 aij->garray = garray; 155 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 156 ierr = ISDestroy(&from);CHKERRQ(ierr); 157 ierr = ISDestroy(&to);CHKERRQ(ierr); 158 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 162 163 #undef __FUNCT__ 164 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 165 /* 166 Takes the local part of an already assembled MPIAIJ matrix 167 and disassembles it. This is to allow new nonzeros into the matrix 168 that require more communication in the matrix vector multiply. 169 Thus certain data-structures must be rebuilt. 170 171 Kind of slow! But that's what application programmers get when 172 they are sloppy. 173 */ 174 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 175 { 176 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 177 Mat B = aij->B,Bnew; 178 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 179 PetscErrorCode ierr; 180 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 181 PetscScalar v; 182 183 PetscFunctionBegin; 184 /* free stuff related to matrix-vec multiply */ 185 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 186 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 187 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 188 if (aij->colmap) { 189 #if defined (PETSC_USE_CTABLE) 190 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 191 #else 192 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 193 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 194 #endif 195 } 196 197 /* make sure that B is assembled so we can access its values */ 198 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 201 /* invent new B and copy stuff over */ 202 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 203 for (i=0; i<m; i++) { 204 nz[i] = Baij->i[i+1] - Baij->i[i]; 205 } 206 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 207 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 209 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 210 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 211 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 212 ierr = PetscFree(nz);CHKERRQ(ierr); 213 for (i=0; i<m; i++) { 214 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 215 col = garray[Baij->j[ct]]; 216 v = Baij->a[ct++]; 217 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 218 } 219 } 220 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 221 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 222 ierr = MatDestroy(&B);CHKERRQ(ierr); 223 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 224 aij->B = Bnew; 225 A->was_assembled = PETSC_FALSE; 226 PetscFunctionReturn(0); 227 } 228 229 /* ugly stuff added for Glenn someday we should fix this up */ 230 231 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 232 parts of the local matrix */ 233 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 234 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 238 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 239 { 240 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 241 PetscErrorCode ierr; 242 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 243 PetscInt *r_rmapd,*r_rmapo; 244 245 PetscFunctionBegin; 246 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 247 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 248 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 249 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 250 nt = 0; 251 for (i=0; i<inA->rmap->mapping->n; i++) { 252 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 253 nt++; 254 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 255 } 256 } 257 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 258 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 259 for (i=0; i<inA->rmap->mapping->n; i++) { 260 if (r_rmapd[i]) { 261 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 262 } 263 } 264 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 265 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 266 267 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 268 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 269 for (i=0; i<ina->B->cmap->n; i++) { 270 lindices[garray[i]] = i+1; 271 } 272 no = inA->rmap->mapping->n - nt; 273 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 274 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 275 nt = 0; 276 for (i=0; i<inA->rmap->mapping->n; i++) { 277 if (lindices[inA->rmap->mapping->indices[i]]) { 278 nt++; 279 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 280 } 281 } 282 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 283 ierr = PetscFree(lindices);CHKERRQ(ierr); 284 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 285 for (i=0; i<inA->rmap->mapping->n; i++) { 286 if (r_rmapo[i]) { 287 auglyrmapo[(r_rmapo[i]-1)] = i; 288 } 289 } 290 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 291 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 292 293 PetscFunctionReturn(0); 294 } 295 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 298 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 299 { 300 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 301 PetscErrorCode ierr; 302 303 PetscFunctionBegin; 304 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 EXTERN_C_BEGIN 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 311 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 312 { 313 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 314 PetscErrorCode ierr; 315 PetscInt n,i; 316 PetscScalar *d,*o,*s; 317 318 PetscFunctionBegin; 319 if (!auglyrmapd) { 320 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 321 } 322 323 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 324 325 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 326 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 327 for (i=0; i<n; i++) { 328 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 329 } 330 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 331 /* column scale "diagonal" portion of local matrix */ 332 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 333 334 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 335 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 336 for (i=0; i<n; i++) { 337 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 338 } 339 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 340 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 341 /* column scale "off-diagonal" portion of local matrix */ 342 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 343 344 PetscFunctionReturn(0); 345 } 346 EXTERN_C_END 347 348 349