1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 PetscBool useblockis; 18 #if defined (PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 #if defined (PETSC_USE_CTABLE) 28 /* use a table */ 29 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 30 for (i=0; i<aij->B->rmap->n; i++) { 31 for (j=0; j<B->ilen[i]; j++) { 32 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 33 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 34 if (!data) { 35 /* one based table */ 36 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 37 } 38 } 39 } 40 /* form array of columns we need */ 41 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 42 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 43 while (tpos) { 44 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 50 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 51 for (i=0; i<ec; i++) { 52 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 53 } 54 /* compact out the extra columns in B */ 55 for (i=0; i<aij->B->rmap->n; i++) { 56 for (j=0; j<B->ilen[i]; j++) { 57 PetscInt gid1 = aj[B->i[i] + j] + 1; 58 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 59 lid --; 60 aj[B->i[i] + j] = lid; 61 } 62 } 63 aij->B->cmap->n = aij->B->cmap->N = ec; 64 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 65 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 66 #else 67 /* Make an array as long as the number of columns */ 68 /* mark those columns that are in aij->B */ 69 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 70 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 71 for (i=0; i<aij->B->rmap->n; i++) { 72 for (j=0; j<B->ilen[i]; j++) { 73 if (!indices[aj[B->i[i] + j] ]) ec++; 74 indices[aj[B->i[i] + j] ] = 1; 75 } 76 } 77 78 /* form array of columns we need */ 79 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 80 ec = 0; 81 for (i=0; i<N; i++) { 82 if (indices[i]) garray[ec++] = i; 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<aij->B->rmap->n; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 aij->B->cmap->n = aij->B->cmap->N = ec; 97 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 98 ierr = PetscFree(indices);CHKERRQ(ierr); 99 #endif 100 /* create local vector that is used to scatter into */ 101 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 102 103 /* create two temporary Index sets for build scatter gather */ 104 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 105 useblockis = PETSC_FALSE; 106 if (mat->cmap->bs > 1) { 107 PetscInt bs = mat->cmap->bs,ibs,ga; 108 if (!(ec % bs)) { 109 useblockis = PETSC_TRUE; 110 for (i=0; i<ec/bs; i++) { 111 if ((ga = garray[ibs = i*bs]) % bs) { 112 useblockis = PETSC_FALSE; 113 break; 114 } 115 for (j=1; j<bs; j++) { 116 if (garray[ibs+j] != ga+j) { 117 useblockis = PETSC_FALSE; 118 break; 119 } 120 } 121 if (!useblockis) break; 122 } 123 } 124 } 125 #if defined(PETSC_USE_DEBUG) 126 i = (PetscInt)useblockis; 127 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm);CHKERRQ(ierr); 128 if (j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 129 #endif 130 131 if (useblockis) { 132 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 133 if (ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 134 ierr = PetscInfo(mat,"Using block index set to define scatter\n");CHKERRQ(ierr); 135 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 136 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 137 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 138 } else { 139 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 140 } 141 142 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 143 144 /* create temporary global vector to generate scatter context */ 145 /* This does not allocate the array's memory so is efficient */ 146 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 147 148 /* generate the scatter context */ 149 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 150 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 151 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 152 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 154 aij->garray = garray; 155 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 156 ierr = ISDestroy(&from);CHKERRQ(ierr); 157 ierr = ISDestroy(&to);CHKERRQ(ierr); 158 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 159 PetscFunctionReturn(0); 160 } 161 162 163 #undef __FUNCT__ 164 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 165 /* 166 Takes the local part of an already assembled MPIAIJ matrix 167 and disassembles it. This is to allow new nonzeros into the matrix 168 that require more communication in the matrix vector multiply. 169 Thus certain data-structures must be rebuilt. 170 171 Kind of slow! But that's what application programmers get when 172 they are sloppy. 173 */ 174 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 175 { 176 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 177 Mat B = aij->B,Bnew; 178 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 179 PetscErrorCode ierr; 180 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 181 PetscScalar v; 182 183 PetscFunctionBegin; 184 /* free stuff related to matrix-vec multiply */ 185 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 186 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 187 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 188 if (aij->colmap) { 189 #if defined (PETSC_USE_CTABLE) 190 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 191 #else 192 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 193 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 194 #endif 195 } 196 197 /* make sure that B is assembled so we can access its values */ 198 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 199 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 201 /* invent new B and copy stuff over */ 202 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 203 for (i=0; i<m; i++) { 204 nz[i] = Baij->i[i+1] - Baij->i[i]; 205 } 206 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 207 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 208 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 209 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 210 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 211 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 212 ierr = PetscFree(nz);CHKERRQ(ierr); 213 for (i=0; i<m; i++) { 214 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 215 col = garray[Baij->j[ct]]; 216 v = Baij->a[ct++]; 217 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 218 } 219 } 220 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 221 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 222 ierr = MatDestroy(&B);CHKERRQ(ierr); 223 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 224 aij->B = Bnew; 225 A->was_assembled = PETSC_FALSE; 226 PetscFunctionReturn(0); 227 } 228 229 /* ugly stuff added for Glenn someday we should fix this up */ 230 231 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 232 parts of the local matrix */ 233 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 234 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 238 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 239 { 240 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 241 PetscErrorCode ierr; 242 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 243 PetscInt *r_rmapd,*r_rmapo; 244 245 PetscFunctionBegin; 246 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 247 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 248 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 249 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 250 nt = 0; 251 for (i=0; i<inA->rmap->mapping->n; i++) { 252 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 253 nt++; 254 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 255 } 256 } 257 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 258 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 259 for (i=0; i<inA->rmap->mapping->n; i++) { 260 if (r_rmapd[i]) { 261 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 262 } 263 } 264 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 265 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 266 267 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 268 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 269 for (i=0; i<ina->B->cmap->n; i++) { 270 lindices[garray[i]] = i+1; 271 } 272 no = inA->rmap->mapping->n - nt; 273 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 274 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 275 nt = 0; 276 for (i=0; i<inA->rmap->mapping->n; i++) { 277 if (lindices[inA->rmap->mapping->indices[i]]) { 278 nt++; 279 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 280 } 281 } 282 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 283 ierr = PetscFree(lindices);CHKERRQ(ierr); 284 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 285 for (i=0; i<inA->rmap->mapping->n; i++) { 286 if (r_rmapo[i]) { 287 auglyrmapo[(r_rmapo[i]-1)] = i; 288 } 289 } 290 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 291 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 297 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 298 { 299 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 300 PetscErrorCode ierr; 301 302 PetscFunctionBegin; 303 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 EXTERN_C_BEGIN 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 310 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 311 { 312 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 313 PetscErrorCode ierr; 314 PetscInt n,i; 315 PetscScalar *d,*o,*s; 316 317 PetscFunctionBegin; 318 if (!auglyrmapd) { 319 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 320 } 321 322 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 323 324 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 325 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 326 for (i=0; i<n; i++) { 327 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 328 } 329 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 330 /* column scale "diagonal" portion of local matrix */ 331 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 332 333 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 334 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 335 for (i=0; i<n; i++) { 336 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 337 } 338 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 339 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 340 /* column scale "off-diagonal" portion of local matrix */ 341 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 EXTERN_C_END 345 346 347