1 2 /* 3 Support for the parallel AIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/aij/mpi/mpiaij.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatSetUpMultiply_MPIAIJ" 9 PetscErrorCode MatSetUpMultiply_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 12 Mat_SeqAIJ *B = (Mat_SeqAIJ*)(aij->B->data); 13 PetscErrorCode ierr; 14 PetscInt i,j,*aj = B->j,ec = 0,*garray; 15 IS from,to; 16 Vec gvec; 17 PetscBool useblockis; 18 #if defined (PETSC_USE_CTABLE) 19 PetscTable gid1_lid1; 20 PetscTablePosition tpos; 21 PetscInt gid,lid; 22 #else 23 PetscInt N = mat->cmap->N,*indices; 24 #endif 25 26 PetscFunctionBegin; 27 28 #if defined (PETSC_USE_CTABLE) 29 /* use a table */ 30 ierr = PetscTableCreate(aij->B->rmap->n,mat->cmap->N+1,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<aij->B->rmap->n; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i] + j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 35 if (!data) { 36 /* one based table */ 37 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 38 } 39 } 40 } 41 /* form array of columns we need */ 42 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 43 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 44 while (tpos) { 45 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 46 gid--; 47 lid--; 48 garray[lid] = gid; 49 } 50 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); /* sort, and rebuild */ 51 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 52 for (i=0; i<ec; i++) { 53 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 54 } 55 /* compact out the extra columns in B */ 56 for (i=0; i<aij->B->rmap->n; i++) { 57 for (j=0; j<B->ilen[i]; j++) { 58 PetscInt gid1 = aj[B->i[i] + j] + 1; 59 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 60 lid --; 61 aj[B->i[i] + j] = lid; 62 } 63 } 64 aij->B->cmap->n = aij->B->cmap->N = ec; 65 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 67 #else 68 /* Make an array as long as the number of columns */ 69 /* mark those columns that are in aij->B */ 70 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 71 ierr = PetscMemzero(indices,N*sizeof(PetscInt));CHKERRQ(ierr); 72 for (i=0; i<aij->B->rmap->n; i++) { 73 for (j=0; j<B->ilen[i]; j++) { 74 if (!indices[aj[B->i[i] + j] ]) ec++; 75 indices[aj[B->i[i] + j] ] = 1; 76 } 77 } 78 79 /* form array of columns we need */ 80 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 81 ec = 0; 82 for (i=0; i<N; i++) { 83 if (indices[i]) garray[ec++] = i; 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<aij->B->rmap->n; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 aij->B->cmap->n = aij->B->cmap->N = ec; 98 ierr = PetscLayoutSetUp((aij->B->cmap));CHKERRQ(ierr); 99 ierr = PetscFree(indices);CHKERRQ(ierr); 100 #endif 101 /* create local vector that is used to scatter into */ 102 ierr = VecCreateSeq(PETSC_COMM_SELF,ec,&aij->lvec);CHKERRQ(ierr); 103 104 /* create two temporary Index sets for build scatter gather */ 105 /* check for the special case where blocks are communicated for faster VecScatterXXX */ 106 useblockis = PETSC_FALSE; 107 if (mat->cmap->bs > 1) { 108 PetscInt bs = mat->cmap->bs,ibs,ga; 109 if (!(ec % bs)) { 110 useblockis = PETSC_TRUE; 111 for (i=0; i<ec/bs; i++) { 112 if ((ga = garray[ibs = i*bs]) % bs) { 113 useblockis = PETSC_FALSE; 114 break; 115 } 116 for (j=1; j<bs; j++) { 117 if (garray[ibs+j] != ga+j) { 118 useblockis = PETSC_FALSE; 119 break; 120 } 121 } 122 if (!useblockis) break; 123 } 124 } 125 } 126 #if defined(PETSC_USE_DEBUG) 127 i = (PetscInt)useblockis; 128 ierr = MPI_Allreduce(&i,&j,1,MPIU_INT,MPI_MIN,((PetscObject)mat)->comm); CHKERRQ(ierr); 129 if(j!=i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Use of blocked not consistant (I am usning blocked)"); 130 #endif 131 132 if (useblockis) { 133 PetscInt *ga,bs = mat->cmap->bs,iec = ec/bs; 134 if(ec%bs)SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ec=%D bs=%D",ec,bs); 135 ierr = PetscInfo(mat,"Using block index set to define scatter\n"); 136 ierr = PetscMalloc(iec*sizeof(PetscInt),&ga);CHKERRQ(ierr); 137 for (i=0; i<iec; i++) ga[i] = garray[i*bs]/bs; 138 ierr = ISCreateBlock(((PetscObject)mat)->comm,bs,iec,ga,PETSC_OWN_POINTER,&from);CHKERRQ(ierr); 139 } else { 140 ierr = ISCreateGeneral(((PetscObject)mat)->comm,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 141 } 142 143 ierr = ISCreateStride(PETSC_COMM_SELF,ec,0,1,&to);CHKERRQ(ierr); 144 145 /* create temporary global vector to generate scatter context */ 146 /* This does not allocate the array's memory so is efficient */ 147 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 148 149 /* generate the scatter context */ 150 ierr = VecScatterCreate(gvec,from,aij->lvec,to,&aij->Mvctx);CHKERRQ(ierr); 151 ierr = PetscLogObjectParent(mat,aij->Mvctx);CHKERRQ(ierr); 152 ierr = PetscLogObjectParent(mat,aij->lvec);CHKERRQ(ierr); 153 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 155 aij->garray = garray; 156 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 157 ierr = ISDestroy(&from);CHKERRQ(ierr); 158 ierr = ISDestroy(&to);CHKERRQ(ierr); 159 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 160 PetscFunctionReturn(0); 161 } 162 163 164 #undef __FUNCT__ 165 #define __FUNCT__ "MatDisAssemble_MPIAIJ" 166 /* 167 Takes the local part of an already assembled MPIAIJ matrix 168 and disassembles it. This is to allow new nonzeros into the matrix 169 that require more communication in the matrix vector multiply. 170 Thus certain data-structures must be rebuilt. 171 172 Kind of slow! But that's what application programmers get when 173 they are sloppy. 174 */ 175 PetscErrorCode MatDisAssemble_MPIAIJ(Mat A) 176 { 177 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 178 Mat B = aij->B,Bnew; 179 Mat_SeqAIJ *Baij = (Mat_SeqAIJ*)B->data; 180 PetscErrorCode ierr; 181 PetscInt i,j,m = B->rmap->n,n = A->cmap->N,col,ct = 0,*garray = aij->garray,*nz,ec; 182 PetscScalar v; 183 184 PetscFunctionBegin; 185 /* free stuff related to matrix-vec multiply */ 186 ierr = VecGetSize(aij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 187 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 188 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 189 if (aij->colmap) { 190 #if defined (PETSC_USE_CTABLE) 191 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 192 #else 193 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 194 ierr = PetscLogObjectMemory(A,-aij->B->cmap->n*sizeof(PetscInt));CHKERRQ(ierr); 195 #endif 196 } 197 198 /* make sure that B is assembled so we can access its values */ 199 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 200 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 201 202 /* invent new B and copy stuff over */ 203 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&nz);CHKERRQ(ierr); 204 for (i=0; i<m; i++) { 205 nz[i] = Baij->i[i+1] - Baij->i[i]; 206 } 207 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 208 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 209 ierr = MatSetBlockSizes(Bnew,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 210 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 211 ierr = MatSeqAIJSetPreallocation(Bnew,0,nz);CHKERRQ(ierr); 212 ((Mat_SeqAIJ*)Bnew->data)->nonew = Baij->nonew; /* Inherit insertion error options. */ 213 ierr = PetscFree(nz);CHKERRQ(ierr); 214 for (i=0; i<m; i++) { 215 for (j=Baij->i[i]; j<Baij->i[i+1]; j++) { 216 col = garray[Baij->j[ct]]; 217 v = Baij->a[ct++]; 218 ierr = MatSetValues(Bnew,1,&i,1,&col,&v,B->insertmode);CHKERRQ(ierr); 219 } 220 } 221 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 222 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 223 ierr = MatDestroy(&B);CHKERRQ(ierr); 224 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 225 aij->B = Bnew; 226 A->was_assembled = PETSC_FALSE; 227 PetscFunctionReturn(0); 228 } 229 230 /* ugly stuff added for Glenn someday we should fix this up */ 231 232 static PetscInt *auglyrmapd = 0,*auglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" 233 parts of the local matrix */ 234 static Vec auglydd = 0,auglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 235 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocalSetUp" 239 PetscErrorCode MatMPIAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 240 { 241 Mat_MPIAIJ *ina = (Mat_MPIAIJ*) inA->data; /*access private part of matrix */ 242 PetscErrorCode ierr; 243 PetscInt i,n,nt,cstart,cend,no,*garray = ina->garray,*lindices; 244 PetscInt *r_rmapd,*r_rmapo; 245 246 PetscFunctionBegin; 247 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 248 ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr); 249 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr); 250 ierr = PetscMemzero(r_rmapd,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 251 nt = 0; 252 for (i=0; i<inA->rmap->mapping->n; i++) { 253 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 254 nt++; 255 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 256 } 257 } 258 if (nt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D n %D",nt,n); 259 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&auglyrmapd);CHKERRQ(ierr); 260 for (i=0; i<inA->rmap->mapping->n; i++) { 261 if (r_rmapd[i]){ 262 auglyrmapd[(r_rmapd[i]-1)-cstart] = i; 263 } 264 } 265 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 266 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&auglydd);CHKERRQ(ierr); 267 268 ierr = PetscMalloc((inA->cmap->N+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr); 269 ierr = PetscMemzero(lindices,inA->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 270 for (i=0; i<ina->B->cmap->n; i++) { 271 lindices[garray[i]] = i+1; 272 } 273 no = inA->rmap->mapping->n - nt; 274 ierr = PetscMalloc((inA->rmap->mapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr); 275 ierr = PetscMemzero(r_rmapo,inA->rmap->mapping->n*sizeof(PetscInt));CHKERRQ(ierr); 276 nt = 0; 277 for (i=0; i<inA->rmap->mapping->n; i++) { 278 if (lindices[inA->rmap->mapping->indices[i]]) { 279 nt++; 280 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 281 } 282 } 283 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 284 ierr = PetscFree(lindices);CHKERRQ(ierr); 285 ierr = PetscMalloc((nt+1)*sizeof(PetscInt),&auglyrmapo);CHKERRQ(ierr); 286 for (i=0; i<inA->rmap->mapping->n; i++) { 287 if (r_rmapo[i]){ 288 auglyrmapo[(r_rmapo[i]-1)] = i; 289 } 290 } 291 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 292 ierr = VecCreateSeq(PETSC_COMM_SELF,nt,&auglyoo);CHKERRQ(ierr); 293 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatMPIAIJDiagonalScaleLocal" 299 PetscErrorCode MatMPIAIJDiagonalScaleLocal(Mat A,Vec scale) 300 { 301 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 302 PetscErrorCode ierr; 303 304 PetscFunctionBegin; 305 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 EXTERN_C_BEGIN 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatDiagonalScaleLocal_MPIAIJ" 312 PetscErrorCode MatDiagonalScaleLocal_MPIAIJ(Mat A,Vec scale) 313 { 314 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; /*access private part of matrix */ 315 PetscErrorCode ierr; 316 PetscInt n,i; 317 PetscScalar *d,*o,*s; 318 319 PetscFunctionBegin; 320 if (!auglyrmapd) { 321 ierr = MatMPIAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 322 } 323 324 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 325 326 ierr = VecGetLocalSize(auglydd,&n);CHKERRQ(ierr); 327 ierr = VecGetArray(auglydd,&d);CHKERRQ(ierr); 328 for (i=0; i<n; i++) { 329 d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 330 } 331 ierr = VecRestoreArray(auglydd,&d);CHKERRQ(ierr); 332 /* column scale "diagonal" portion of local matrix */ 333 ierr = MatDiagonalScale(a->A,PETSC_NULL,auglydd);CHKERRQ(ierr); 334 335 ierr = VecGetLocalSize(auglyoo,&n);CHKERRQ(ierr); 336 ierr = VecGetArray(auglyoo,&o);CHKERRQ(ierr); 337 for (i=0; i<n; i++) { 338 o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 339 } 340 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 341 ierr = VecRestoreArray(auglyoo,&o);CHKERRQ(ierr); 342 /* column scale "off-diagonal" portion of local matrix */ 343 ierr = MatDiagonalScale(a->B,PETSC_NULL,auglyoo);CHKERRQ(ierr); 344 345 PetscFunctionReturn(0); 346 } 347 EXTERN_C_END 348 349 350