1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscMPIInt size = mat->size; 748 PetscSF sf; 749 PetscInt *lrows; 750 PetscSFNode *rrows; 751 PetscInt lastidx = -1, r, p = 0, len = 0; 752 PetscErrorCode ierr; 753 754 PetscFunctionBegin; 755 /* Create SF where leaves are input rows and roots are owned rows */ 756 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 757 for (r = 0; r < n; ++r) lrows[r] = -1; 758 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 759 for (r = 0; r < N; ++r) { 760 const PetscInt idx = rows[r]; 761 PetscBool found = PETSC_FALSE; 762 /* Trick for efficient searching for sorted rows */ 763 if (lastidx > idx) p = 0; 764 lastidx = idx; 765 for (; p < size; ++p) { 766 if (idx >= owners[p] && idx < owners[p+1]) { 767 rrows[r].rank = p; 768 rrows[r].index = rows[r] - owners[p]; 769 found = PETSC_TRUE; 770 break; 771 } 772 } 773 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 774 } 775 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 776 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 777 /* Collect flags for rows to be zeroed */ 778 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 779 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 780 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 781 /* Compress and put in row numbers */ 782 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 783 /* fix right hand side if needed */ 784 if (x && b) { 785 const PetscScalar *xx; 786 PetscScalar *bb; 787 788 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 789 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 790 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 791 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 793 } 794 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 795 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 796 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 937 /* only change matrix nonzero state if pattern was allowed to be changed */ 938 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 939 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 940 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 } 942 PetscFunctionReturn(0); 943 } 944 945 #undef __FUNCT__ 946 #define __FUNCT__ "MatMult_MPIAIJ" 947 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 948 { 949 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 950 PetscErrorCode ierr; 951 PetscInt nt; 952 953 PetscFunctionBegin; 954 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 955 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 956 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 957 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 958 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 960 PetscFunctionReturn(0); 961 } 962 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 965 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 966 { 967 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 968 PetscErrorCode ierr; 969 970 PetscFunctionBegin; 971 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 972 PetscFunctionReturn(0); 973 } 974 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatMultAdd_MPIAIJ" 977 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 978 { 979 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 980 PetscErrorCode ierr; 981 982 PetscFunctionBegin; 983 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 984 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 985 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 #undef __FUNCT__ 991 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 992 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 993 { 994 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 995 PetscErrorCode ierr; 996 PetscBool merged; 997 998 PetscFunctionBegin; 999 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1000 /* do nondiagonal part */ 1001 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1002 if (!merged) { 1003 /* send it on its way */ 1004 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* receive remote parts: note this assumes the values are not actually */ 1008 /* added in yy until the next line, */ 1009 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1010 } else { 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* send it on its way */ 1014 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1015 /* values actually were received in the Begin() but we need to call this nop */ 1016 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 } 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1023 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1024 { 1025 MPI_Comm comm; 1026 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1027 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1028 IS Me,Notme; 1029 PetscErrorCode ierr; 1030 PetscInt M,N,first,last,*notme,i; 1031 PetscMPIInt size; 1032 1033 PetscFunctionBegin; 1034 /* Easy test: symmetric diagonal block */ 1035 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1036 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1037 if (!*f) PetscFunctionReturn(0); 1038 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1039 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1040 if (size == 1) PetscFunctionReturn(0); 1041 1042 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1043 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1044 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1045 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1046 for (i=0; i<first; i++) notme[i] = i; 1047 for (i=last; i<M; i++) notme[i-last+first] = i; 1048 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1049 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1050 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1051 Aoff = Aoffs[0]; 1052 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1053 Boff = Boffs[0]; 1054 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1056 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1058 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1059 ierr = PetscFree(notme);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1065 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 1070 PetscFunctionBegin; 1071 /* do nondiagonal part */ 1072 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1073 /* send it on its way */ 1074 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1075 /* do local part */ 1076 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1077 /* receive remote parts */ 1078 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 /* 1083 This only works correctly for square matrices where the subblock A->A is the 1084 diagonal block 1085 */ 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1088 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1089 { 1090 PetscErrorCode ierr; 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1092 1093 PetscFunctionBegin; 1094 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1095 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1096 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatScale_MPIAIJ" 1102 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 1107 PetscFunctionBegin; 1108 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1109 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatDestroy_Redundant" 1115 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1116 { 1117 PetscErrorCode ierr; 1118 Mat_Redundant *redund = *redundant; 1119 PetscInt i; 1120 1121 PetscFunctionBegin; 1122 *redundant = NULL; 1123 if (redund){ 1124 if (redund->matseq) { /* via MatGetSubMatrices() */ 1125 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1126 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1127 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1128 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1129 } else { 1130 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1131 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1132 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1133 for (i=0; i<redund->nrecvs; i++) { 1134 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1135 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1136 } 1137 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1138 } 1139 1140 if (redund->psubcomm) { 1141 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1142 } 1143 ierr = PetscFree(redund);CHKERRQ(ierr); 1144 } 1145 PetscFunctionReturn(0); 1146 } 1147 1148 #undef __FUNCT__ 1149 #define __FUNCT__ "MatDestroy_MPIAIJ" 1150 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1151 { 1152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 #if defined(PETSC_USE_LOG) 1157 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1158 #endif 1159 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1160 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1161 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1162 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1163 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1164 #if defined(PETSC_USE_CTABLE) 1165 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1166 #else 1167 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1168 #endif 1169 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1170 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1171 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1172 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1173 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1174 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1175 1176 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1184 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1190 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1191 { 1192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1193 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1194 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1195 PetscErrorCode ierr; 1196 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1197 int fd; 1198 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1199 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1200 PetscScalar *column_values; 1201 PetscInt message_count,flowcontrolcount; 1202 FILE *file; 1203 1204 PetscFunctionBegin; 1205 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1206 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1207 nz = A->nz + B->nz; 1208 if (!rank) { 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = mat->rmap->N; 1211 header[2] = mat->cmap->N; 1212 1213 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1214 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1215 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1216 /* get largest number of rows any processor has */ 1217 rlen = mat->rmap->n; 1218 range = mat->rmap->range; 1219 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1220 } else { 1221 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1222 rlen = mat->rmap->n; 1223 } 1224 1225 /* load up the local row counts */ 1226 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1227 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1228 1229 /* store the row lengths to the file */ 1230 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1231 if (!rank) { 1232 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 for (i=1; i<size; i++) { 1234 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1235 rlen = range[i+1] - range[i]; 1236 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1238 } 1239 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1240 } else { 1241 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1242 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1243 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1244 } 1245 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1246 1247 /* load up the local column indices */ 1248 nzmax = nz; /* th processor needs space a largest processor needs */ 1249 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1250 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1251 cnt = 0; 1252 for (i=0; i<mat->rmap->n; i++) { 1253 for (j=B->i[i]; j<B->i[i+1]; j++) { 1254 if ((col = garray[B->j[j]]) > cstart) break; 1255 column_indices[cnt++] = col; 1256 } 1257 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1258 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1259 } 1260 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1261 1262 /* store the column indices to the file */ 1263 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1264 if (!rank) { 1265 MPI_Status status; 1266 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1267 for (i=1; i<size; i++) { 1268 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1269 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1270 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1271 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1272 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1273 } 1274 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1275 } else { 1276 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1277 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1280 } 1281 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1282 1283 /* load up the local column values */ 1284 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1285 cnt = 0; 1286 for (i=0; i<mat->rmap->n; i++) { 1287 for (j=B->i[i]; j<B->i[i+1]; j++) { 1288 if (garray[B->j[j]] > cstart) break; 1289 column_values[cnt++] = B->a[j]; 1290 } 1291 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1292 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1293 } 1294 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1295 1296 /* store the column values to the file */ 1297 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1298 if (!rank) { 1299 MPI_Status status; 1300 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1301 for (i=1; i<size; i++) { 1302 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1303 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1304 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1305 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1306 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1307 } 1308 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1311 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1313 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1314 } 1315 ierr = PetscFree(column_values);CHKERRQ(ierr); 1316 1317 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1318 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1319 PetscFunctionReturn(0); 1320 } 1321 1322 #include <petscdraw.h> 1323 #undef __FUNCT__ 1324 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1325 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1326 { 1327 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1328 PetscErrorCode ierr; 1329 PetscMPIInt rank = aij->rank,size = aij->size; 1330 PetscBool isdraw,iascii,isbinary; 1331 PetscViewer sviewer; 1332 PetscViewerFormat format; 1333 1334 PetscFunctionBegin; 1335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1336 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1337 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1338 if (iascii) { 1339 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1340 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1341 MatInfo info; 1342 PetscBool inodes; 1343 1344 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1345 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1346 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1348 if (!inodes) { 1349 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1350 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1351 } else { 1352 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1353 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1354 } 1355 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1356 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1357 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1358 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1361 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1362 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1365 PetscInt inodecount,inodelimit,*inodes; 1366 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1367 if (inodes) { 1368 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1369 } else { 1370 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1374 PetscFunctionReturn(0); 1375 } 1376 } else if (isbinary) { 1377 if (size == 1) { 1378 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1379 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1380 } else { 1381 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1382 } 1383 PetscFunctionReturn(0); 1384 } else if (isdraw) { 1385 PetscDraw draw; 1386 PetscBool isnull; 1387 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1388 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1389 } 1390 1391 { 1392 /* assemble the entire matrix onto first processor. */ 1393 Mat A; 1394 Mat_SeqAIJ *Aloc; 1395 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1396 MatScalar *a; 1397 1398 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1399 if (!rank) { 1400 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1401 } else { 1402 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1403 } 1404 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1405 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1406 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1407 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1408 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1409 1410 /* copy over the A part */ 1411 Aloc = (Mat_SeqAIJ*)aij->A->data; 1412 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1413 row = mat->rmap->rstart; 1414 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1415 for (i=0; i<m; i++) { 1416 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1417 row++; 1418 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1419 } 1420 aj = Aloc->j; 1421 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1422 1423 /* copy over the B part */ 1424 Aloc = (Mat_SeqAIJ*)aij->B->data; 1425 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1426 row = mat->rmap->rstart; 1427 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1428 ct = cols; 1429 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1430 for (i=0; i<m; i++) { 1431 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1432 row++; 1433 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1434 } 1435 ierr = PetscFree(ct);CHKERRQ(ierr); 1436 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1438 /* 1439 Everyone has to call to draw the matrix since the graphics waits are 1440 synchronized across all processors that share the PetscDraw object 1441 */ 1442 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1443 if (!rank) { 1444 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1445 } 1446 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1447 ierr = MatDestroy(&A);CHKERRQ(ierr); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatView_MPIAIJ" 1454 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1455 { 1456 PetscErrorCode ierr; 1457 PetscBool iascii,isdraw,issocket,isbinary; 1458 1459 PetscFunctionBegin; 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1463 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1464 if (iascii || isdraw || isbinary || issocket) { 1465 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 #undef __FUNCT__ 1471 #define __FUNCT__ "MatSOR_MPIAIJ" 1472 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1473 { 1474 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1475 PetscErrorCode ierr; 1476 Vec bb1 = 0; 1477 PetscBool hasop; 1478 1479 PetscFunctionBegin; 1480 if (flag == SOR_APPLY_UPPER) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1486 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1487 } 1488 1489 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1490 if (flag & SOR_ZERO_INITIAL_GUESS) { 1491 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1492 its--; 1493 } 1494 1495 while (its--) { 1496 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 1499 /* update rhs: bb1 = bb - B*x */ 1500 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1501 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1502 1503 /* local sweep */ 1504 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1505 } 1506 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 while (its--) { 1512 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 1515 /* update rhs: bb1 = bb - B*x */ 1516 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1517 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1518 1519 /* local sweep */ 1520 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1521 } 1522 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1523 if (flag & SOR_ZERO_INITIAL_GUESS) { 1524 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1525 its--; 1526 } 1527 while (its--) { 1528 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 1531 /* update rhs: bb1 = bb - B*x */ 1532 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1533 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1534 1535 /* local sweep */ 1536 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1537 } 1538 } else if (flag & SOR_EISENSTAT) { 1539 Vec xx1; 1540 1541 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1542 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1543 1544 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 if (!mat->diag) { 1547 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1548 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1549 } 1550 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1551 if (hasop) { 1552 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1553 } else { 1554 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1555 } 1556 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1557 1558 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1559 1560 /* local sweep */ 1561 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1562 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1563 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1564 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1565 1566 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1567 PetscFunctionReturn(0); 1568 } 1569 1570 #undef __FUNCT__ 1571 #define __FUNCT__ "MatPermute_MPIAIJ" 1572 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1573 { 1574 Mat aA,aB,Aperm; 1575 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1576 PetscScalar *aa,*ba; 1577 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1578 PetscSF rowsf,sf; 1579 IS parcolp = NULL; 1580 PetscBool done; 1581 PetscErrorCode ierr; 1582 1583 PetscFunctionBegin; 1584 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1585 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1586 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1587 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1588 1589 /* Invert row permutation to find out where my rows should go */ 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1593 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1594 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 1597 /* Invert column permutation to find out where my columns should go */ 1598 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1599 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1600 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1601 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1602 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1604 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1605 1606 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1608 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1609 1610 /* Find out where my gcols should go */ 1611 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1612 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1613 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1614 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1615 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1616 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1617 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1618 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1619 1620 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1621 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1622 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1623 for (i=0; i<m; i++) { 1624 PetscInt row = rdest[i],rowner; 1625 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1626 for (j=ai[i]; j<ai[i+1]; j++) { 1627 PetscInt cowner,col = cdest[aj[j]]; 1628 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1629 if (rowner == cowner) dnnz[i]++; 1630 else onnz[i]++; 1631 } 1632 for (j=bi[i]; j<bi[i+1]; j++) { 1633 PetscInt cowner,col = gcdest[bj[j]]; 1634 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1635 if (rowner == cowner) dnnz[i]++; 1636 else onnz[i]++; 1637 } 1638 } 1639 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1641 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1643 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1644 1645 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1647 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) { 1649 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1650 PetscInt j0,rowlen; 1651 rowlen = ai[i+1] - ai[i]; 1652 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1653 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1654 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1655 } 1656 rowlen = bi[i+1] - bi[i]; 1657 for (j0=j=0; j<rowlen; j0=j) { 1658 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1659 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1660 } 1661 } 1662 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1664 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1665 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1666 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1667 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1668 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1669 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1670 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1671 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1672 *B = Aperm; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 #undef __FUNCT__ 1677 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1678 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1679 { 1680 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1681 Mat A = mat->A,B = mat->B; 1682 PetscErrorCode ierr; 1683 PetscReal isend[5],irecv[5]; 1684 1685 PetscFunctionBegin; 1686 info->block_size = 1.0; 1687 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1688 1689 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1690 isend[3] = info->memory; isend[4] = info->mallocs; 1691 1692 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1693 1694 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1695 isend[3] += info->memory; isend[4] += info->mallocs; 1696 if (flag == MAT_LOCAL) { 1697 info->nz_used = isend[0]; 1698 info->nz_allocated = isend[1]; 1699 info->nz_unneeded = isend[2]; 1700 info->memory = isend[3]; 1701 info->mallocs = isend[4]; 1702 } else if (flag == MAT_GLOBAL_MAX) { 1703 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1704 1705 info->nz_used = irecv[0]; 1706 info->nz_allocated = irecv[1]; 1707 info->nz_unneeded = irecv[2]; 1708 info->memory = irecv[3]; 1709 info->mallocs = irecv[4]; 1710 } else if (flag == MAT_GLOBAL_SUM) { 1711 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1712 1713 info->nz_used = irecv[0]; 1714 info->nz_allocated = irecv[1]; 1715 info->nz_unneeded = irecv[2]; 1716 info->memory = irecv[3]; 1717 info->mallocs = irecv[4]; 1718 } 1719 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1720 info->fill_ratio_needed = 0; 1721 info->factor_mallocs = 0; 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatSetOption_MPIAIJ" 1727 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1728 { 1729 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 switch (op) { 1734 case MAT_NEW_NONZERO_LOCATIONS: 1735 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1736 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1737 case MAT_KEEP_NONZERO_PATTERN: 1738 case MAT_NEW_NONZERO_LOCATION_ERR: 1739 case MAT_USE_INODES: 1740 case MAT_IGNORE_ZERO_ENTRIES: 1741 MatCheckPreallocated(A,1); 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_ROW_ORIENTED: 1746 a->roworiented = flg; 1747 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1750 break; 1751 case MAT_NEW_DIAGONALS: 1752 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1753 break; 1754 case MAT_IGNORE_OFF_PROC_ENTRIES: 1755 a->donotstash = flg; 1756 break; 1757 case MAT_SPD: 1758 A->spd_set = PETSC_TRUE; 1759 A->spd = flg; 1760 if (flg) { 1761 A->symmetric = PETSC_TRUE; 1762 A->structurally_symmetric = PETSC_TRUE; 1763 A->symmetric_set = PETSC_TRUE; 1764 A->structurally_symmetric_set = PETSC_TRUE; 1765 } 1766 break; 1767 case MAT_SYMMETRIC: 1768 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1769 break; 1770 case MAT_STRUCTURALLY_SYMMETRIC: 1771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1772 break; 1773 case MAT_HERMITIAN: 1774 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1775 break; 1776 case MAT_SYMMETRY_ETERNAL: 1777 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1778 break; 1779 default: 1780 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1781 } 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatGetRow_MPIAIJ" 1787 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1788 { 1789 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1790 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1791 PetscErrorCode ierr; 1792 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1793 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1794 PetscInt *cmap,*idx_p; 1795 1796 PetscFunctionBegin; 1797 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1798 mat->getrowactive = PETSC_TRUE; 1799 1800 if (!mat->rowvalues && (idx || v)) { 1801 /* 1802 allocate enough space to hold information from the longest row. 1803 */ 1804 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1805 PetscInt max = 1,tmp; 1806 for (i=0; i<matin->rmap->n; i++) { 1807 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1808 if (max < tmp) max = tmp; 1809 } 1810 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1811 } 1812 1813 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1814 lrow = row - rstart; 1815 1816 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1817 if (!v) {pvA = 0; pvB = 0;} 1818 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1819 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1820 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1821 nztot = nzA + nzB; 1822 1823 cmap = mat->garray; 1824 if (v || idx) { 1825 if (nztot) { 1826 /* Sort by increasing column numbers, assuming A and B already sorted */ 1827 PetscInt imark = -1; 1828 if (v) { 1829 *v = v_p = mat->rowvalues; 1830 for (i=0; i<nzB; i++) { 1831 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1832 else break; 1833 } 1834 imark = i; 1835 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1836 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1837 } 1838 if (idx) { 1839 *idx = idx_p = mat->rowindices; 1840 if (imark > -1) { 1841 for (i=0; i<imark; i++) { 1842 idx_p[i] = cmap[cworkB[i]]; 1843 } 1844 } else { 1845 for (i=0; i<nzB; i++) { 1846 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1847 else break; 1848 } 1849 imark = i; 1850 } 1851 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1852 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1853 } 1854 } else { 1855 if (idx) *idx = 0; 1856 if (v) *v = 0; 1857 } 1858 } 1859 *nz = nztot; 1860 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1861 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1862 PetscFunctionReturn(0); 1863 } 1864 1865 #undef __FUNCT__ 1866 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1867 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1868 { 1869 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1870 1871 PetscFunctionBegin; 1872 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1873 aij->getrowactive = PETSC_FALSE; 1874 PetscFunctionReturn(0); 1875 } 1876 1877 #undef __FUNCT__ 1878 #define __FUNCT__ "MatNorm_MPIAIJ" 1879 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1880 { 1881 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1882 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1883 PetscErrorCode ierr; 1884 PetscInt i,j,cstart = mat->cmap->rstart; 1885 PetscReal sum = 0.0; 1886 MatScalar *v; 1887 1888 PetscFunctionBegin; 1889 if (aij->size == 1) { 1890 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1891 } else { 1892 if (type == NORM_FROBENIUS) { 1893 v = amat->a; 1894 for (i=0; i<amat->nz; i++) { 1895 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1896 } 1897 v = bmat->a; 1898 for (i=0; i<bmat->nz; i++) { 1899 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1900 } 1901 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1902 *norm = PetscSqrtReal(*norm); 1903 } else if (type == NORM_1) { /* max column norm */ 1904 PetscReal *tmp,*tmp2; 1905 PetscInt *jj,*garray = aij->garray; 1906 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1907 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1908 *norm = 0.0; 1909 v = amat->a; jj = amat->j; 1910 for (j=0; j<amat->nz; j++) { 1911 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1912 } 1913 v = bmat->a; jj = bmat->j; 1914 for (j=0; j<bmat->nz; j++) { 1915 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1916 } 1917 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1918 for (j=0; j<mat->cmap->N; j++) { 1919 if (tmp2[j] > *norm) *norm = tmp2[j]; 1920 } 1921 ierr = PetscFree(tmp);CHKERRQ(ierr); 1922 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1923 } else if (type == NORM_INFINITY) { /* max row norm */ 1924 PetscReal ntemp = 0.0; 1925 for (j=0; j<aij->A->rmap->n; j++) { 1926 v = amat->a + amat->i[j]; 1927 sum = 0.0; 1928 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1929 sum += PetscAbsScalar(*v); v++; 1930 } 1931 v = bmat->a + bmat->i[j]; 1932 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1933 sum += PetscAbsScalar(*v); v++; 1934 } 1935 if (sum > ntemp) ntemp = sum; 1936 } 1937 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1938 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1939 } 1940 PetscFunctionReturn(0); 1941 } 1942 1943 #undef __FUNCT__ 1944 #define __FUNCT__ "MatTranspose_MPIAIJ" 1945 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1946 { 1947 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1948 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1949 PetscErrorCode ierr; 1950 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1951 PetscInt cstart = A->cmap->rstart,ncol; 1952 Mat B; 1953 MatScalar *array; 1954 1955 PetscFunctionBegin; 1956 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1957 1958 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1959 ai = Aloc->i; aj = Aloc->j; 1960 bi = Bloc->i; bj = Bloc->j; 1961 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1962 PetscInt *d_nnz,*g_nnz,*o_nnz; 1963 PetscSFNode *oloc; 1964 PETSC_UNUSED PetscSF sf; 1965 1966 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1967 /* compute d_nnz for preallocation */ 1968 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1969 for (i=0; i<ai[ma]; i++) { 1970 d_nnz[aj[i]]++; 1971 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1972 } 1973 /* compute local off-diagonal contributions */ 1974 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1975 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1976 /* map those to global */ 1977 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1978 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1979 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1980 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1981 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1982 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1983 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1984 1985 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1986 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1987 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1988 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1989 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1990 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1991 } else { 1992 B = *matout; 1993 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1994 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1995 } 1996 1997 /* copy over the A part */ 1998 array = Aloc->a; 1999 row = A->rmap->rstart; 2000 for (i=0; i<ma; i++) { 2001 ncol = ai[i+1]-ai[i]; 2002 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2003 row++; 2004 array += ncol; aj += ncol; 2005 } 2006 aj = Aloc->j; 2007 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2008 2009 /* copy over the B part */ 2010 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2011 array = Bloc->a; 2012 row = A->rmap->rstart; 2013 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2014 cols_tmp = cols; 2015 for (i=0; i<mb; i++) { 2016 ncol = bi[i+1]-bi[i]; 2017 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2018 row++; 2019 array += ncol; cols_tmp += ncol; 2020 } 2021 ierr = PetscFree(cols);CHKERRQ(ierr); 2022 2023 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2026 *matout = B; 2027 } else { 2028 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2035 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2036 { 2037 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2038 Mat a = aij->A,b = aij->B; 2039 PetscErrorCode ierr; 2040 PetscInt s1,s2,s3; 2041 2042 PetscFunctionBegin; 2043 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2044 if (rr) { 2045 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2046 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2047 /* Overlap communication with computation. */ 2048 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2049 } 2050 if (ll) { 2051 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2052 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2053 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2054 } 2055 /* scale the diagonal block */ 2056 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2057 2058 if (rr) { 2059 /* Do a scatter end and then right scale the off-diagonal block */ 2060 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2061 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2062 } 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2068 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2069 { 2070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2071 PetscErrorCode ierr; 2072 2073 PetscFunctionBegin; 2074 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatEqual_MPIAIJ" 2080 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2081 { 2082 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2083 Mat a,b,c,d; 2084 PetscBool flg; 2085 PetscErrorCode ierr; 2086 2087 PetscFunctionBegin; 2088 a = matA->A; b = matA->B; 2089 c = matB->A; d = matB->B; 2090 2091 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2092 if (flg) { 2093 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2094 } 2095 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #undef __FUNCT__ 2100 #define __FUNCT__ "MatCopy_MPIAIJ" 2101 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2102 { 2103 PetscErrorCode ierr; 2104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2105 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2106 2107 PetscFunctionBegin; 2108 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2109 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2110 /* because of the column compression in the off-processor part of the matrix a->B, 2111 the number of columns in a->B and b->B may be different, hence we cannot call 2112 the MatCopy() directly on the two parts. If need be, we can provide a more 2113 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2114 then copying the submatrices */ 2115 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2116 } else { 2117 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2118 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2119 } 2120 PetscFunctionReturn(0); 2121 } 2122 2123 #undef __FUNCT__ 2124 #define __FUNCT__ "MatSetUp_MPIAIJ" 2125 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2126 { 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* 2135 Computes the number of nonzeros per row needed for preallocation when X and Y 2136 have different nonzero structure. 2137 */ 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2140 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2141 { 2142 PetscInt i,j,k,nzx,nzy; 2143 2144 PetscFunctionBegin; 2145 /* Set the number of nonzeros in the new matrix */ 2146 for (i=0; i<m; i++) { 2147 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2148 nzx = xi[i+1] - xi[i]; 2149 nzy = yi[i+1] - yi[i]; 2150 nnz[i] = 0; 2151 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2152 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2153 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2154 nnz[i]++; 2155 } 2156 for (; k<nzy; k++) nnz[i]++; 2157 } 2158 PetscFunctionReturn(0); 2159 } 2160 2161 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2162 #undef __FUNCT__ 2163 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2164 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2165 { 2166 PetscErrorCode ierr; 2167 PetscInt m = Y->rmap->N; 2168 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2169 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2170 2171 PetscFunctionBegin; 2172 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 #undef __FUNCT__ 2177 #define __FUNCT__ "MatAXPY_MPIAIJ" 2178 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2179 { 2180 PetscErrorCode ierr; 2181 PetscInt i; 2182 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2183 PetscBLASInt bnz,one=1; 2184 Mat_SeqAIJ *x,*y; 2185 2186 PetscFunctionBegin; 2187 if (str == SAME_NONZERO_PATTERN) { 2188 PetscScalar alpha = a; 2189 x = (Mat_SeqAIJ*)xx->A->data; 2190 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2191 y = (Mat_SeqAIJ*)yy->A->data; 2192 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2193 x = (Mat_SeqAIJ*)xx->B->data; 2194 y = (Mat_SeqAIJ*)yy->B->data; 2195 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2196 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2197 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2198 } else if (str == SUBSET_NONZERO_PATTERN) { 2199 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2200 2201 x = (Mat_SeqAIJ*)xx->B->data; 2202 y = (Mat_SeqAIJ*)yy->B->data; 2203 if (y->xtoy && y->XtoY != xx->B) { 2204 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2205 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2206 } 2207 if (!y->xtoy) { /* get xtoy */ 2208 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2209 y->XtoY = xx->B; 2210 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2211 } 2212 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2213 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2214 } else { 2215 Mat B; 2216 PetscInt *nnz_d,*nnz_o; 2217 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2218 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2219 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2220 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2221 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2222 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2223 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2224 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2225 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2226 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2227 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2228 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2229 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2230 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2231 } 2232 PetscFunctionReturn(0); 2233 } 2234 2235 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatConjugate_MPIAIJ" 2239 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2240 { 2241 #if defined(PETSC_USE_COMPLEX) 2242 PetscErrorCode ierr; 2243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2244 2245 PetscFunctionBegin; 2246 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2247 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2248 #else 2249 PetscFunctionBegin; 2250 #endif 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #undef __FUNCT__ 2255 #define __FUNCT__ "MatRealPart_MPIAIJ" 2256 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2257 { 2258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2259 PetscErrorCode ierr; 2260 2261 PetscFunctionBegin; 2262 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2263 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2269 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2270 { 2271 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2272 PetscErrorCode ierr; 2273 2274 PetscFunctionBegin; 2275 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2276 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #if defined(PETSC_HAVE_PBGL) 2281 2282 #include <boost/parallel/mpi/bsp_process_group.hpp> 2283 #include <boost/graph/distributed/ilu_default_graph.hpp> 2284 #include <boost/graph/distributed/ilu_0_block.hpp> 2285 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2286 #include <boost/graph/distributed/petsc/interface.hpp> 2287 #include <boost/multi_array.hpp> 2288 #include <boost/parallel/distributed_property_map->hpp> 2289 2290 #undef __FUNCT__ 2291 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2292 /* 2293 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2294 */ 2295 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2296 { 2297 namespace petsc = boost::distributed::petsc; 2298 2299 namespace graph_dist = boost::graph::distributed; 2300 using boost::graph::distributed::ilu_default::process_group_type; 2301 using boost::graph::ilu_permuted; 2302 2303 PetscBool row_identity, col_identity; 2304 PetscContainer c; 2305 PetscInt m, n, M, N; 2306 PetscErrorCode ierr; 2307 2308 PetscFunctionBegin; 2309 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2310 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2311 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2312 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2313 2314 process_group_type pg; 2315 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2316 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2317 lgraph_type& level_graph = *lgraph_p; 2318 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2319 2320 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2321 ilu_permuted(level_graph); 2322 2323 /* put together the new matrix */ 2324 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2325 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2326 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2327 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2328 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2329 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2330 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2332 2333 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2334 ierr = PetscContainerSetPointer(c, lgraph_p); 2335 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2336 ierr = PetscContainerDestroy(&c); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 #undef __FUNCT__ 2341 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2342 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2343 { 2344 PetscFunctionBegin; 2345 PetscFunctionReturn(0); 2346 } 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatSolve_MPIAIJ" 2350 /* 2351 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2352 */ 2353 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2354 { 2355 namespace graph_dist = boost::graph::distributed; 2356 2357 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2358 lgraph_type *lgraph_p; 2359 PetscContainer c; 2360 PetscErrorCode ierr; 2361 2362 PetscFunctionBegin; 2363 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2364 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2365 ierr = VecCopy(b, x);CHKERRQ(ierr); 2366 2367 PetscScalar *array_x; 2368 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2369 PetscInt sx; 2370 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2371 2372 PetscScalar *array_b; 2373 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2374 PetscInt sb; 2375 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2376 2377 lgraph_type& level_graph = *lgraph_p; 2378 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2379 2380 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2381 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2382 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2383 2384 typedef boost::iterator_property_map<array_ref_type::iterator, 2385 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2386 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2387 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2388 2389 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2390 PetscFunctionReturn(0); 2391 } 2392 #endif 2393 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2397 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2398 { 2399 PetscMPIInt rank,size; 2400 MPI_Comm comm; 2401 PetscErrorCode ierr; 2402 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2403 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2404 PetscInt *rowrange = mat->rmap->range; 2405 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2406 Mat A = aij->A,B=aij->B,C=*matredundant; 2407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2408 PetscScalar *sbuf_a; 2409 PetscInt nzlocal=a->nz+b->nz; 2410 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2411 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2412 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2413 MatScalar *aworkA,*aworkB; 2414 PetscScalar *vals; 2415 PetscMPIInt tag1,tag2,tag3,imdex; 2416 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2417 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2418 MPI_Status recv_status,*send_status; 2419 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2420 PetscInt **rbuf_j=NULL; 2421 PetscScalar **rbuf_a=NULL; 2422 Mat_Redundant *redund =NULL; 2423 2424 PetscFunctionBegin; 2425 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2426 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2427 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2428 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2429 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2430 2431 if (reuse == MAT_REUSE_MATRIX) { 2432 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2433 if (subsize == 1) { 2434 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2435 redund = c->redundant; 2436 } else { 2437 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2438 redund = c->redundant; 2439 } 2440 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2441 2442 nsends = redund->nsends; 2443 nrecvs = redund->nrecvs; 2444 send_rank = redund->send_rank; 2445 recv_rank = redund->recv_rank; 2446 sbuf_nz = redund->sbuf_nz; 2447 rbuf_nz = redund->rbuf_nz; 2448 sbuf_j = redund->sbuf_j; 2449 sbuf_a = redund->sbuf_a; 2450 rbuf_j = redund->rbuf_j; 2451 rbuf_a = redund->rbuf_a; 2452 } 2453 2454 if (reuse == MAT_INITIAL_MATRIX) { 2455 PetscInt nleftover,np_subcomm; 2456 2457 /* get the destination processors' id send_rank, nsends and nrecvs */ 2458 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2459 2460 np_subcomm = size/nsubcomm; 2461 nleftover = size - nsubcomm*np_subcomm; 2462 2463 /* block of codes below is specific for INTERLACED */ 2464 /* ------------------------------------------------*/ 2465 nsends = 0; nrecvs = 0; 2466 for (i=0; i<size; i++) { 2467 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2468 send_rank[nsends++] = i; 2469 recv_rank[nrecvs++] = i; 2470 } 2471 } 2472 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2473 i = size-nleftover-1; 2474 j = 0; 2475 while (j < nsubcomm - nleftover) { 2476 send_rank[nsends++] = i; 2477 i--; j++; 2478 } 2479 } 2480 2481 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2482 for (i=0; i<nleftover; i++) { 2483 recv_rank[nrecvs++] = size-nleftover+i; 2484 } 2485 } 2486 /*----------------------------------------------*/ 2487 2488 /* allocate sbuf_j, sbuf_a */ 2489 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2490 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2491 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2492 /* 2493 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2494 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2495 */ 2496 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2497 2498 /* copy mat's local entries into the buffers */ 2499 if (reuse == MAT_INITIAL_MATRIX) { 2500 rownz_max = 0; 2501 rptr = sbuf_j; 2502 cols = sbuf_j + rend-rstart + 1; 2503 vals = sbuf_a; 2504 rptr[0] = 0; 2505 for (i=0; i<rend-rstart; i++) { 2506 row = i + rstart; 2507 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2508 ncols = nzA + nzB; 2509 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2510 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2511 /* load the column indices for this row into cols */ 2512 lwrite = 0; 2513 for (l=0; l<nzB; l++) { 2514 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2515 vals[lwrite] = aworkB[l]; 2516 cols[lwrite++] = ctmp; 2517 } 2518 } 2519 for (l=0; l<nzA; l++) { 2520 vals[lwrite] = aworkA[l]; 2521 cols[lwrite++] = cstart + cworkA[l]; 2522 } 2523 for (l=0; l<nzB; l++) { 2524 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2525 vals[lwrite] = aworkB[l]; 2526 cols[lwrite++] = ctmp; 2527 } 2528 } 2529 vals += ncols; 2530 cols += ncols; 2531 rptr[i+1] = rptr[i] + ncols; 2532 if (rownz_max < ncols) rownz_max = ncols; 2533 } 2534 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2535 } else { /* only copy matrix values into sbuf_a */ 2536 rptr = sbuf_j; 2537 vals = sbuf_a; 2538 rptr[0] = 0; 2539 for (i=0; i<rend-rstart; i++) { 2540 row = i + rstart; 2541 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2542 ncols = nzA + nzB; 2543 cworkB = b->j + b->i[i]; 2544 aworkA = a->a + a->i[i]; 2545 aworkB = b->a + b->i[i]; 2546 lwrite = 0; 2547 for (l=0; l<nzB; l++) { 2548 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2549 } 2550 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2551 for (l=0; l<nzB; l++) { 2552 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2553 } 2554 vals += ncols; 2555 rptr[i+1] = rptr[i] + ncols; 2556 } 2557 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2558 2559 /* send nzlocal to others, and recv other's nzlocal */ 2560 /*--------------------------------------------------*/ 2561 if (reuse == MAT_INITIAL_MATRIX) { 2562 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2563 2564 s_waits2 = s_waits3 + nsends; 2565 s_waits1 = s_waits2 + nsends; 2566 r_waits1 = s_waits1 + nsends; 2567 r_waits2 = r_waits1 + nrecvs; 2568 r_waits3 = r_waits2 + nrecvs; 2569 } else { 2570 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2571 2572 r_waits3 = s_waits3 + nsends; 2573 } 2574 2575 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2576 if (reuse == MAT_INITIAL_MATRIX) { 2577 /* get new tags to keep the communication clean */ 2578 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2579 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2580 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2581 2582 /* post receives of other's nzlocal */ 2583 for (i=0; i<nrecvs; i++) { 2584 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2585 } 2586 /* send nzlocal to others */ 2587 for (i=0; i<nsends; i++) { 2588 sbuf_nz[i] = nzlocal; 2589 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2590 } 2591 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2592 count = nrecvs; 2593 while (count) { 2594 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2595 2596 recv_rank[imdex] = recv_status.MPI_SOURCE; 2597 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2598 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2599 2600 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2601 2602 rbuf_nz[imdex] += i + 2; 2603 2604 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2605 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2606 count--; 2607 } 2608 /* wait on sends of nzlocal */ 2609 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2610 /* send mat->i,j to others, and recv from other's */ 2611 /*------------------------------------------------*/ 2612 for (i=0; i<nsends; i++) { 2613 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2614 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2615 } 2616 /* wait on receives of mat->i,j */ 2617 /*------------------------------*/ 2618 count = nrecvs; 2619 while (count) { 2620 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2621 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2622 count--; 2623 } 2624 /* wait on sends of mat->i,j */ 2625 /*---------------------------*/ 2626 if (nsends) { 2627 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2628 } 2629 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2630 2631 /* post receives, send and receive mat->a */ 2632 /*----------------------------------------*/ 2633 for (imdex=0; imdex<nrecvs; imdex++) { 2634 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2635 } 2636 for (i=0; i<nsends; i++) { 2637 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2638 } 2639 count = nrecvs; 2640 while (count) { 2641 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2642 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2643 count--; 2644 } 2645 if (nsends) { 2646 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2647 } 2648 2649 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2650 2651 /* create redundant matrix */ 2652 /*-------------------------*/ 2653 if (reuse == MAT_INITIAL_MATRIX) { 2654 const PetscInt *range; 2655 PetscInt rstart_sub,rend_sub,mloc_sub; 2656 2657 /* compute rownz_max for preallocation */ 2658 for (imdex=0; imdex<nrecvs; imdex++) { 2659 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2660 rptr = rbuf_j[imdex]; 2661 for (i=0; i<j; i++) { 2662 ncols = rptr[i+1] - rptr[i]; 2663 if (rownz_max < ncols) rownz_max = ncols; 2664 } 2665 } 2666 2667 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2668 2669 /* get local size of redundant matrix 2670 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2671 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2672 rstart_sub = range[nsubcomm*subrank]; 2673 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2674 rend_sub = range[nsubcomm*(subrank+1)]; 2675 } else { 2676 rend_sub = mat->rmap->N; 2677 } 2678 mloc_sub = rend_sub - rstart_sub; 2679 2680 if (M == N) { 2681 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2682 } else { /* non-square matrix */ 2683 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2684 } 2685 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2686 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2687 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2688 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2689 } else { 2690 C = *matredundant; 2691 } 2692 2693 /* insert local matrix entries */ 2694 rptr = sbuf_j; 2695 cols = sbuf_j + rend-rstart + 1; 2696 vals = sbuf_a; 2697 for (i=0; i<rend-rstart; i++) { 2698 row = i + rstart; 2699 ncols = rptr[i+1] - rptr[i]; 2700 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2701 vals += ncols; 2702 cols += ncols; 2703 } 2704 /* insert received matrix entries */ 2705 for (imdex=0; imdex<nrecvs; imdex++) { 2706 rstart = rowrange[recv_rank[imdex]]; 2707 rend = rowrange[recv_rank[imdex]+1]; 2708 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2709 rptr = rbuf_j[imdex]; 2710 cols = rbuf_j[imdex] + rend-rstart + 1; 2711 vals = rbuf_a[imdex]; 2712 for (i=0; i<rend-rstart; i++) { 2713 row = i + rstart; 2714 ncols = rptr[i+1] - rptr[i]; 2715 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2716 vals += ncols; 2717 cols += ncols; 2718 } 2719 } 2720 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2722 2723 if (reuse == MAT_INITIAL_MATRIX) { 2724 *matredundant = C; 2725 2726 /* create a supporting struct and attach it to C for reuse */ 2727 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2728 if (subsize == 1) { 2729 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2730 c->redundant = redund; 2731 } else { 2732 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2733 c->redundant = redund; 2734 } 2735 2736 redund->nzlocal = nzlocal; 2737 redund->nsends = nsends; 2738 redund->nrecvs = nrecvs; 2739 redund->send_rank = send_rank; 2740 redund->recv_rank = recv_rank; 2741 redund->sbuf_nz = sbuf_nz; 2742 redund->rbuf_nz = rbuf_nz; 2743 redund->sbuf_j = sbuf_j; 2744 redund->sbuf_a = sbuf_a; 2745 redund->rbuf_j = rbuf_j; 2746 redund->rbuf_a = rbuf_a; 2747 redund->psubcomm = NULL; 2748 } 2749 PetscFunctionReturn(0); 2750 } 2751 2752 #undef __FUNCT__ 2753 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2754 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2755 { 2756 PetscErrorCode ierr; 2757 MPI_Comm comm; 2758 PetscMPIInt size,subsize; 2759 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2760 Mat_Redundant *redund=NULL; 2761 PetscSubcomm psubcomm=NULL; 2762 MPI_Comm subcomm_in=subcomm; 2763 Mat *matseq; 2764 IS isrow,iscol; 2765 2766 PetscFunctionBegin; 2767 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2768 if (reuse == MAT_INITIAL_MATRIX) { 2769 /* create psubcomm, then get subcomm */ 2770 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2771 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2772 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2773 2774 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2775 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2776 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2777 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2778 subcomm = psubcomm->comm; 2779 } else { /* retrieve psubcomm and subcomm */ 2780 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2781 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2782 if (subsize == 1) { 2783 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2784 redund = c->redundant; 2785 } else { 2786 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2787 redund = c->redundant; 2788 } 2789 psubcomm = redund->psubcomm; 2790 } 2791 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2792 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2793 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2794 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2795 if (subsize == 1) { 2796 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2797 c->redundant->psubcomm = psubcomm; 2798 } else { 2799 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2800 c->redundant->psubcomm = psubcomm ; 2801 } 2802 } 2803 PetscFunctionReturn(0); 2804 } 2805 } 2806 2807 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2808 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2809 if (reuse == MAT_INITIAL_MATRIX) { 2810 /* create a local sequential matrix matseq[0] */ 2811 mloc_sub = PETSC_DECIDE; 2812 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2813 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2814 rstart = rend - mloc_sub; 2815 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2816 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2817 } else { /* reuse == MAT_REUSE_MATRIX */ 2818 if (subsize == 1) { 2819 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2820 redund = c->redundant; 2821 } else { 2822 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2823 redund = c->redundant; 2824 } 2825 2826 isrow = redund->isrow; 2827 iscol = redund->iscol; 2828 matseq = redund->matseq; 2829 } 2830 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2831 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2832 2833 if (reuse == MAT_INITIAL_MATRIX) { 2834 /* create a supporting struct and attach it to C for reuse */ 2835 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2836 if (subsize == 1) { 2837 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2838 c->redundant = redund; 2839 } else { 2840 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2841 c->redundant = redund; 2842 } 2843 redund->isrow = isrow; 2844 redund->iscol = iscol; 2845 redund->matseq = matseq; 2846 redund->psubcomm = psubcomm; 2847 } 2848 PetscFunctionReturn(0); 2849 } 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2853 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2854 { 2855 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2856 PetscErrorCode ierr; 2857 PetscInt i,*idxb = 0; 2858 PetscScalar *va,*vb; 2859 Vec vtmp; 2860 2861 PetscFunctionBegin; 2862 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2863 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2864 if (idx) { 2865 for (i=0; i<A->rmap->n; i++) { 2866 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2867 } 2868 } 2869 2870 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2871 if (idx) { 2872 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2873 } 2874 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2875 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2876 2877 for (i=0; i<A->rmap->n; i++) { 2878 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2879 va[i] = vb[i]; 2880 if (idx) idx[i] = a->garray[idxb[i]]; 2881 } 2882 } 2883 2884 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2885 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2886 ierr = PetscFree(idxb);CHKERRQ(ierr); 2887 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2888 PetscFunctionReturn(0); 2889 } 2890 2891 #undef __FUNCT__ 2892 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2893 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2894 { 2895 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2896 PetscErrorCode ierr; 2897 PetscInt i,*idxb = 0; 2898 PetscScalar *va,*vb; 2899 Vec vtmp; 2900 2901 PetscFunctionBegin; 2902 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2903 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2904 if (idx) { 2905 for (i=0; i<A->cmap->n; i++) { 2906 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2907 } 2908 } 2909 2910 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2911 if (idx) { 2912 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2913 } 2914 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2915 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2916 2917 for (i=0; i<A->rmap->n; i++) { 2918 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2919 va[i] = vb[i]; 2920 if (idx) idx[i] = a->garray[idxb[i]]; 2921 } 2922 } 2923 2924 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2925 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2926 ierr = PetscFree(idxb);CHKERRQ(ierr); 2927 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2933 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2936 PetscInt n = A->rmap->n; 2937 PetscInt cstart = A->cmap->rstart; 2938 PetscInt *cmap = mat->garray; 2939 PetscInt *diagIdx, *offdiagIdx; 2940 Vec diagV, offdiagV; 2941 PetscScalar *a, *diagA, *offdiagA; 2942 PetscInt r; 2943 PetscErrorCode ierr; 2944 2945 PetscFunctionBegin; 2946 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2947 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2948 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2949 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2950 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2952 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2953 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2954 for (r = 0; r < n; ++r) { 2955 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2956 a[r] = diagA[r]; 2957 idx[r] = cstart + diagIdx[r]; 2958 } else { 2959 a[r] = offdiagA[r]; 2960 idx[r] = cmap[offdiagIdx[r]]; 2961 } 2962 } 2963 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2964 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2966 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2967 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2968 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2974 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2975 { 2976 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2977 PetscInt n = A->rmap->n; 2978 PetscInt cstart = A->cmap->rstart; 2979 PetscInt *cmap = mat->garray; 2980 PetscInt *diagIdx, *offdiagIdx; 2981 Vec diagV, offdiagV; 2982 PetscScalar *a, *diagA, *offdiagA; 2983 PetscInt r; 2984 PetscErrorCode ierr; 2985 2986 PetscFunctionBegin; 2987 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2990 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2991 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2992 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2993 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2994 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2995 for (r = 0; r < n; ++r) { 2996 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2997 a[r] = diagA[r]; 2998 idx[r] = cstart + diagIdx[r]; 2999 } else { 3000 a[r] = offdiagA[r]; 3001 idx[r] = cmap[offdiagIdx[r]]; 3002 } 3003 } 3004 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3007 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3008 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3009 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3015 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3016 { 3017 PetscErrorCode ierr; 3018 Mat *dummy; 3019 3020 PetscFunctionBegin; 3021 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3022 *newmat = *dummy; 3023 ierr = PetscFree(dummy);CHKERRQ(ierr); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 #undef __FUNCT__ 3028 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3029 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3030 { 3031 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3032 PetscErrorCode ierr; 3033 3034 PetscFunctionBegin; 3035 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3036 PetscFunctionReturn(0); 3037 } 3038 3039 #undef __FUNCT__ 3040 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3041 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3042 { 3043 PetscErrorCode ierr; 3044 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3045 3046 PetscFunctionBegin; 3047 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3048 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3049 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3050 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 /* -------------------------------------------------------------------*/ 3055 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3056 MatGetRow_MPIAIJ, 3057 MatRestoreRow_MPIAIJ, 3058 MatMult_MPIAIJ, 3059 /* 4*/ MatMultAdd_MPIAIJ, 3060 MatMultTranspose_MPIAIJ, 3061 MatMultTransposeAdd_MPIAIJ, 3062 #if defined(PETSC_HAVE_PBGL) 3063 MatSolve_MPIAIJ, 3064 #else 3065 0, 3066 #endif 3067 0, 3068 0, 3069 /*10*/ 0, 3070 0, 3071 0, 3072 MatSOR_MPIAIJ, 3073 MatTranspose_MPIAIJ, 3074 /*15*/ MatGetInfo_MPIAIJ, 3075 MatEqual_MPIAIJ, 3076 MatGetDiagonal_MPIAIJ, 3077 MatDiagonalScale_MPIAIJ, 3078 MatNorm_MPIAIJ, 3079 /*20*/ MatAssemblyBegin_MPIAIJ, 3080 MatAssemblyEnd_MPIAIJ, 3081 MatSetOption_MPIAIJ, 3082 MatZeroEntries_MPIAIJ, 3083 /*24*/ MatZeroRows_MPIAIJ, 3084 0, 3085 #if defined(PETSC_HAVE_PBGL) 3086 0, 3087 #else 3088 0, 3089 #endif 3090 0, 3091 0, 3092 /*29*/ MatSetUp_MPIAIJ, 3093 #if defined(PETSC_HAVE_PBGL) 3094 0, 3095 #else 3096 0, 3097 #endif 3098 0, 3099 0, 3100 0, 3101 /*34*/ MatDuplicate_MPIAIJ, 3102 0, 3103 0, 3104 0, 3105 0, 3106 /*39*/ MatAXPY_MPIAIJ, 3107 MatGetSubMatrices_MPIAIJ, 3108 MatIncreaseOverlap_MPIAIJ, 3109 MatGetValues_MPIAIJ, 3110 MatCopy_MPIAIJ, 3111 /*44*/ MatGetRowMax_MPIAIJ, 3112 MatScale_MPIAIJ, 3113 0, 3114 0, 3115 MatZeroRowsColumns_MPIAIJ, 3116 /*49*/ MatSetRandom_MPIAIJ, 3117 0, 3118 0, 3119 0, 3120 0, 3121 /*54*/ MatFDColoringCreate_MPIXAIJ, 3122 0, 3123 MatSetUnfactored_MPIAIJ, 3124 MatPermute_MPIAIJ, 3125 0, 3126 /*59*/ MatGetSubMatrix_MPIAIJ, 3127 MatDestroy_MPIAIJ, 3128 MatView_MPIAIJ, 3129 0, 3130 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3131 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3132 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3133 0, 3134 0, 3135 0, 3136 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3137 MatGetRowMinAbs_MPIAIJ, 3138 0, 3139 MatSetColoring_MPIAIJ, 3140 0, 3141 MatSetValuesAdifor_MPIAIJ, 3142 /*75*/ MatFDColoringApply_AIJ, 3143 0, 3144 0, 3145 0, 3146 MatFindZeroDiagonals_MPIAIJ, 3147 /*80*/ 0, 3148 0, 3149 0, 3150 /*83*/ MatLoad_MPIAIJ, 3151 0, 3152 0, 3153 0, 3154 0, 3155 0, 3156 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3157 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3158 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3159 MatPtAP_MPIAIJ_MPIAIJ, 3160 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3161 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3162 0, 3163 0, 3164 0, 3165 0, 3166 /*99*/ 0, 3167 0, 3168 0, 3169 MatConjugate_MPIAIJ, 3170 0, 3171 /*104*/MatSetValuesRow_MPIAIJ, 3172 MatRealPart_MPIAIJ, 3173 MatImaginaryPart_MPIAIJ, 3174 0, 3175 0, 3176 /*109*/0, 3177 MatGetRedundantMatrix_MPIAIJ, 3178 MatGetRowMin_MPIAIJ, 3179 0, 3180 0, 3181 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3182 0, 3183 0, 3184 0, 3185 0, 3186 /*119*/0, 3187 0, 3188 0, 3189 0, 3190 MatGetMultiProcBlock_MPIAIJ, 3191 /*124*/MatFindNonzeroRows_MPIAIJ, 3192 MatGetColumnNorms_MPIAIJ, 3193 MatInvertBlockDiagonal_MPIAIJ, 3194 0, 3195 MatGetSubMatricesParallel_MPIAIJ, 3196 /*129*/0, 3197 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3198 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3199 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3200 0, 3201 /*134*/0, 3202 0, 3203 0, 3204 0, 3205 0, 3206 /*139*/0, 3207 0, 3208 0, 3209 MatFDColoringSetUp_MPIXAIJ 3210 }; 3211 3212 /* ----------------------------------------------------------------------------------------*/ 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3216 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3217 { 3218 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3223 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3224 PetscFunctionReturn(0); 3225 } 3226 3227 #undef __FUNCT__ 3228 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3229 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3230 { 3231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3232 PetscErrorCode ierr; 3233 3234 PetscFunctionBegin; 3235 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3236 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3237 PetscFunctionReturn(0); 3238 } 3239 3240 #undef __FUNCT__ 3241 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3242 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3243 { 3244 Mat_MPIAIJ *b; 3245 PetscErrorCode ierr; 3246 3247 PetscFunctionBegin; 3248 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3249 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3250 b = (Mat_MPIAIJ*)B->data; 3251 3252 if (!B->preallocated) { 3253 /* Explicitly create 2 MATSEQAIJ matrices. */ 3254 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3255 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3256 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3257 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3258 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3259 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3260 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3261 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3262 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3263 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3264 } 3265 3266 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3267 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3268 B->preallocated = PETSC_TRUE; 3269 PetscFunctionReturn(0); 3270 } 3271 3272 #undef __FUNCT__ 3273 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3274 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3275 { 3276 Mat mat; 3277 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3278 PetscErrorCode ierr; 3279 3280 PetscFunctionBegin; 3281 *newmat = 0; 3282 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3283 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3284 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3285 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3286 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3287 a = (Mat_MPIAIJ*)mat->data; 3288 3289 mat->factortype = matin->factortype; 3290 mat->assembled = PETSC_TRUE; 3291 mat->insertmode = NOT_SET_VALUES; 3292 mat->preallocated = PETSC_TRUE; 3293 3294 a->size = oldmat->size; 3295 a->rank = oldmat->rank; 3296 a->donotstash = oldmat->donotstash; 3297 a->roworiented = oldmat->roworiented; 3298 a->rowindices = 0; 3299 a->rowvalues = 0; 3300 a->getrowactive = PETSC_FALSE; 3301 3302 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3303 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3304 3305 if (oldmat->colmap) { 3306 #if defined(PETSC_USE_CTABLE) 3307 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3308 #else 3309 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3310 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3311 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3312 #endif 3313 } else a->colmap = 0; 3314 if (oldmat->garray) { 3315 PetscInt len; 3316 len = oldmat->B->cmap->n; 3317 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3318 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3319 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3320 } else a->garray = 0; 3321 3322 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3323 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3324 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3325 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3326 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3327 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3328 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3330 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3331 *newmat = mat; 3332 PetscFunctionReturn(0); 3333 } 3334 3335 3336 3337 #undef __FUNCT__ 3338 #define __FUNCT__ "MatLoad_MPIAIJ" 3339 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3340 { 3341 PetscScalar *vals,*svals; 3342 MPI_Comm comm; 3343 PetscErrorCode ierr; 3344 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3345 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3346 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3347 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3348 PetscInt cend,cstart,n,*rowners,sizesset=1; 3349 int fd; 3350 PetscInt bs = 1; 3351 3352 PetscFunctionBegin; 3353 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3354 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3355 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3356 if (!rank) { 3357 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3358 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3359 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3360 } 3361 3362 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3363 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3364 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3365 3366 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3367 3368 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3369 M = header[1]; N = header[2]; 3370 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3371 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3372 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3373 3374 /* If global sizes are set, check if they are consistent with that given in the file */ 3375 if (sizesset) { 3376 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3377 } 3378 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3379 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3380 3381 /* determine ownership of all (block) rows */ 3382 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3383 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3384 else m = newMat->rmap->n; /* Set by user */ 3385 3386 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3387 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3388 3389 /* First process needs enough room for process with most rows */ 3390 if (!rank) { 3391 mmax = rowners[1]; 3392 for (i=2; i<=size; i++) { 3393 mmax = PetscMax(mmax, rowners[i]); 3394 } 3395 } else mmax = -1; /* unused, but compilers complain */ 3396 3397 rowners[0] = 0; 3398 for (i=2; i<=size; i++) { 3399 rowners[i] += rowners[i-1]; 3400 } 3401 rstart = rowners[rank]; 3402 rend = rowners[rank+1]; 3403 3404 /* distribute row lengths to all processors */ 3405 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3406 if (!rank) { 3407 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3408 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3409 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3410 for (j=0; j<m; j++) { 3411 procsnz[0] += ourlens[j]; 3412 } 3413 for (i=1; i<size; i++) { 3414 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3415 /* calculate the number of nonzeros on each processor */ 3416 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3417 procsnz[i] += rowlengths[j]; 3418 } 3419 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3420 } 3421 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3422 } else { 3423 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3424 } 3425 3426 if (!rank) { 3427 /* determine max buffer needed and allocate it */ 3428 maxnz = 0; 3429 for (i=0; i<size; i++) { 3430 maxnz = PetscMax(maxnz,procsnz[i]); 3431 } 3432 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3433 3434 /* read in my part of the matrix column indices */ 3435 nz = procsnz[0]; 3436 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3437 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3438 3439 /* read in every one elses and ship off */ 3440 for (i=1; i<size; i++) { 3441 nz = procsnz[i]; 3442 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3443 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3444 } 3445 ierr = PetscFree(cols);CHKERRQ(ierr); 3446 } else { 3447 /* determine buffer space needed for message */ 3448 nz = 0; 3449 for (i=0; i<m; i++) { 3450 nz += ourlens[i]; 3451 } 3452 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3453 3454 /* receive message of column indices*/ 3455 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3456 } 3457 3458 /* determine column ownership if matrix is not square */ 3459 if (N != M) { 3460 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3461 else n = newMat->cmap->n; 3462 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3463 cstart = cend - n; 3464 } else { 3465 cstart = rstart; 3466 cend = rend; 3467 n = cend - cstart; 3468 } 3469 3470 /* loop over local rows, determining number of off diagonal entries */ 3471 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3472 jj = 0; 3473 for (i=0; i<m; i++) { 3474 for (j=0; j<ourlens[i]; j++) { 3475 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3476 jj++; 3477 } 3478 } 3479 3480 for (i=0; i<m; i++) { 3481 ourlens[i] -= offlens[i]; 3482 } 3483 if (!sizesset) { 3484 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3485 } 3486 3487 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3488 3489 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3490 3491 for (i=0; i<m; i++) { 3492 ourlens[i] += offlens[i]; 3493 } 3494 3495 if (!rank) { 3496 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3497 3498 /* read in my part of the matrix numerical values */ 3499 nz = procsnz[0]; 3500 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3501 3502 /* insert into matrix */ 3503 jj = rstart; 3504 smycols = mycols; 3505 svals = vals; 3506 for (i=0; i<m; i++) { 3507 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3508 smycols += ourlens[i]; 3509 svals += ourlens[i]; 3510 jj++; 3511 } 3512 3513 /* read in other processors and ship out */ 3514 for (i=1; i<size; i++) { 3515 nz = procsnz[i]; 3516 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3517 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3518 } 3519 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3520 } else { 3521 /* receive numeric values */ 3522 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3523 3524 /* receive message of values*/ 3525 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3526 3527 /* insert into matrix */ 3528 jj = rstart; 3529 smycols = mycols; 3530 svals = vals; 3531 for (i=0; i<m; i++) { 3532 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3533 smycols += ourlens[i]; 3534 svals += ourlens[i]; 3535 jj++; 3536 } 3537 } 3538 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3539 ierr = PetscFree(vals);CHKERRQ(ierr); 3540 ierr = PetscFree(mycols);CHKERRQ(ierr); 3541 ierr = PetscFree(rowners);CHKERRQ(ierr); 3542 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3543 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3544 PetscFunctionReturn(0); 3545 } 3546 3547 #undef __FUNCT__ 3548 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3549 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3550 { 3551 PetscErrorCode ierr; 3552 IS iscol_local; 3553 PetscInt csize; 3554 3555 PetscFunctionBegin; 3556 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3557 if (call == MAT_REUSE_MATRIX) { 3558 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3559 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3560 } else { 3561 PetscInt cbs; 3562 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3563 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3564 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3565 } 3566 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3567 if (call == MAT_INITIAL_MATRIX) { 3568 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3569 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3570 } 3571 PetscFunctionReturn(0); 3572 } 3573 3574 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3577 /* 3578 Not great since it makes two copies of the submatrix, first an SeqAIJ 3579 in local and then by concatenating the local matrices the end result. 3580 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3581 3582 Note: This requires a sequential iscol with all indices. 3583 */ 3584 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3585 { 3586 PetscErrorCode ierr; 3587 PetscMPIInt rank,size; 3588 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3589 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3590 PetscBool allcolumns, colflag; 3591 Mat M,Mreuse; 3592 MatScalar *vwork,*aa; 3593 MPI_Comm comm; 3594 Mat_SeqAIJ *aij; 3595 3596 PetscFunctionBegin; 3597 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3598 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3599 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3600 3601 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3602 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3603 if (colflag && ncol == mat->cmap->N) { 3604 allcolumns = PETSC_TRUE; 3605 } else { 3606 allcolumns = PETSC_FALSE; 3607 } 3608 if (call == MAT_REUSE_MATRIX) { 3609 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3610 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3611 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3612 } else { 3613 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3614 } 3615 3616 /* 3617 m - number of local rows 3618 n - number of columns (same on all processors) 3619 rstart - first row in new global matrix generated 3620 */ 3621 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3622 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3623 if (call == MAT_INITIAL_MATRIX) { 3624 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3625 ii = aij->i; 3626 jj = aij->j; 3627 3628 /* 3629 Determine the number of non-zeros in the diagonal and off-diagonal 3630 portions of the matrix in order to do correct preallocation 3631 */ 3632 3633 /* first get start and end of "diagonal" columns */ 3634 if (csize == PETSC_DECIDE) { 3635 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3636 if (mglobal == n) { /* square matrix */ 3637 nlocal = m; 3638 } else { 3639 nlocal = n/size + ((n % size) > rank); 3640 } 3641 } else { 3642 nlocal = csize; 3643 } 3644 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3645 rstart = rend - nlocal; 3646 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3647 3648 /* next, compute all the lengths */ 3649 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3650 olens = dlens + m; 3651 for (i=0; i<m; i++) { 3652 jend = ii[i+1] - ii[i]; 3653 olen = 0; 3654 dlen = 0; 3655 for (j=0; j<jend; j++) { 3656 if (*jj < rstart || *jj >= rend) olen++; 3657 else dlen++; 3658 jj++; 3659 } 3660 olens[i] = olen; 3661 dlens[i] = dlen; 3662 } 3663 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3664 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3665 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3666 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3667 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3668 ierr = PetscFree(dlens);CHKERRQ(ierr); 3669 } else { 3670 PetscInt ml,nl; 3671 3672 M = *newmat; 3673 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3674 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3675 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3676 /* 3677 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3678 rather than the slower MatSetValues(). 3679 */ 3680 M->was_assembled = PETSC_TRUE; 3681 M->assembled = PETSC_FALSE; 3682 } 3683 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3684 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3685 ii = aij->i; 3686 jj = aij->j; 3687 aa = aij->a; 3688 for (i=0; i<m; i++) { 3689 row = rstart + i; 3690 nz = ii[i+1] - ii[i]; 3691 cwork = jj; jj += nz; 3692 vwork = aa; aa += nz; 3693 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3694 } 3695 3696 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3697 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3698 *newmat = M; 3699 3700 /* save submatrix used in processor for next request */ 3701 if (call == MAT_INITIAL_MATRIX) { 3702 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3703 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3704 } 3705 PetscFunctionReturn(0); 3706 } 3707 3708 #undef __FUNCT__ 3709 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3710 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3711 { 3712 PetscInt m,cstart, cend,j,nnz,i,d; 3713 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3714 const PetscInt *JJ; 3715 PetscScalar *values; 3716 PetscErrorCode ierr; 3717 3718 PetscFunctionBegin; 3719 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3720 3721 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3722 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3723 m = B->rmap->n; 3724 cstart = B->cmap->rstart; 3725 cend = B->cmap->rend; 3726 rstart = B->rmap->rstart; 3727 3728 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3729 3730 #if defined(PETSC_USE_DEBUGGING) 3731 for (i=0; i<m; i++) { 3732 nnz = Ii[i+1]- Ii[i]; 3733 JJ = J + Ii[i]; 3734 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3735 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3736 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3737 } 3738 #endif 3739 3740 for (i=0; i<m; i++) { 3741 nnz = Ii[i+1]- Ii[i]; 3742 JJ = J + Ii[i]; 3743 nnz_max = PetscMax(nnz_max,nnz); 3744 d = 0; 3745 for (j=0; j<nnz; j++) { 3746 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3747 } 3748 d_nnz[i] = d; 3749 o_nnz[i] = nnz - d; 3750 } 3751 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3752 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3753 3754 if (v) values = (PetscScalar*)v; 3755 else { 3756 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3757 } 3758 3759 for (i=0; i<m; i++) { 3760 ii = i + rstart; 3761 nnz = Ii[i+1]- Ii[i]; 3762 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3763 } 3764 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3766 3767 if (!v) { 3768 ierr = PetscFree(values);CHKERRQ(ierr); 3769 } 3770 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3771 PetscFunctionReturn(0); 3772 } 3773 3774 #undef __FUNCT__ 3775 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3776 /*@ 3777 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3778 (the default parallel PETSc format). 3779 3780 Collective on MPI_Comm 3781 3782 Input Parameters: 3783 + B - the matrix 3784 . i - the indices into j for the start of each local row (starts with zero) 3785 . j - the column indices for each local row (starts with zero) 3786 - v - optional values in the matrix 3787 3788 Level: developer 3789 3790 Notes: 3791 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3792 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3793 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3794 3795 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3796 3797 The format which is used for the sparse matrix input, is equivalent to a 3798 row-major ordering.. i.e for the following matrix, the input data expected is 3799 as shown: 3800 3801 1 0 0 3802 2 0 3 P0 3803 ------- 3804 4 5 6 P1 3805 3806 Process0 [P0]: rows_owned=[0,1] 3807 i = {0,1,3} [size = nrow+1 = 2+1] 3808 j = {0,0,2} [size = nz = 6] 3809 v = {1,2,3} [size = nz = 6] 3810 3811 Process1 [P1]: rows_owned=[2] 3812 i = {0,3} [size = nrow+1 = 1+1] 3813 j = {0,1,2} [size = nz = 6] 3814 v = {4,5,6} [size = nz = 6] 3815 3816 .keywords: matrix, aij, compressed row, sparse, parallel 3817 3818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3819 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3820 @*/ 3821 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3822 { 3823 PetscErrorCode ierr; 3824 3825 PetscFunctionBegin; 3826 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3827 PetscFunctionReturn(0); 3828 } 3829 3830 #undef __FUNCT__ 3831 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3832 /*@C 3833 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3834 (the default parallel PETSc format). For good matrix assembly performance 3835 the user should preallocate the matrix storage by setting the parameters 3836 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3837 performance can be increased by more than a factor of 50. 3838 3839 Collective on MPI_Comm 3840 3841 Input Parameters: 3842 + B - the matrix 3843 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3844 (same value is used for all local rows) 3845 . d_nnz - array containing the number of nonzeros in the various rows of the 3846 DIAGONAL portion of the local submatrix (possibly different for each row) 3847 or NULL, if d_nz is used to specify the nonzero structure. 3848 The size of this array is equal to the number of local rows, i.e 'm'. 3849 For matrices that will be factored, you must leave room for (and set) 3850 the diagonal entry even if it is zero. 3851 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3852 submatrix (same value is used for all local rows). 3853 - o_nnz - array containing the number of nonzeros in the various rows of the 3854 OFF-DIAGONAL portion of the local submatrix (possibly different for 3855 each row) or NULL, if o_nz is used to specify the nonzero 3856 structure. The size of this array is equal to the number 3857 of local rows, i.e 'm'. 3858 3859 If the *_nnz parameter is given then the *_nz parameter is ignored 3860 3861 The AIJ format (also called the Yale sparse matrix format or 3862 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3863 storage. The stored row and column indices begin with zero. 3864 See Users-Manual: ch_mat for details. 3865 3866 The parallel matrix is partitioned such that the first m0 rows belong to 3867 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3868 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3869 3870 The DIAGONAL portion of the local submatrix of a processor can be defined 3871 as the submatrix which is obtained by extraction the part corresponding to 3872 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3873 first row that belongs to the processor, r2 is the last row belonging to 3874 the this processor, and c1-c2 is range of indices of the local part of a 3875 vector suitable for applying the matrix to. This is an mxn matrix. In the 3876 common case of a square matrix, the row and column ranges are the same and 3877 the DIAGONAL part is also square. The remaining portion of the local 3878 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3879 3880 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3881 3882 You can call MatGetInfo() to get information on how effective the preallocation was; 3883 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3884 You can also run with the option -info and look for messages with the string 3885 malloc in them to see if additional memory allocation was needed. 3886 3887 Example usage: 3888 3889 Consider the following 8x8 matrix with 34 non-zero values, that is 3890 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3891 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3892 as follows: 3893 3894 .vb 3895 1 2 0 | 0 3 0 | 0 4 3896 Proc0 0 5 6 | 7 0 0 | 8 0 3897 9 0 10 | 11 0 0 | 12 0 3898 ------------------------------------- 3899 13 0 14 | 15 16 17 | 0 0 3900 Proc1 0 18 0 | 19 20 21 | 0 0 3901 0 0 0 | 22 23 0 | 24 0 3902 ------------------------------------- 3903 Proc2 25 26 27 | 0 0 28 | 29 0 3904 30 0 0 | 31 32 33 | 0 34 3905 .ve 3906 3907 This can be represented as a collection of submatrices as: 3908 3909 .vb 3910 A B C 3911 D E F 3912 G H I 3913 .ve 3914 3915 Where the submatrices A,B,C are owned by proc0, D,E,F are 3916 owned by proc1, G,H,I are owned by proc2. 3917 3918 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3919 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3920 The 'M','N' parameters are 8,8, and have the same values on all procs. 3921 3922 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3923 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3924 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3925 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3926 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3927 matrix, ans [DF] as another SeqAIJ matrix. 3928 3929 When d_nz, o_nz parameters are specified, d_nz storage elements are 3930 allocated for every row of the local diagonal submatrix, and o_nz 3931 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3932 One way to choose d_nz and o_nz is to use the max nonzerors per local 3933 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3934 In this case, the values of d_nz,o_nz are: 3935 .vb 3936 proc0 : dnz = 2, o_nz = 2 3937 proc1 : dnz = 3, o_nz = 2 3938 proc2 : dnz = 1, o_nz = 4 3939 .ve 3940 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3941 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3942 for proc3. i.e we are using 12+15+10=37 storage locations to store 3943 34 values. 3944 3945 When d_nnz, o_nnz parameters are specified, the storage is specified 3946 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3947 In the above case the values for d_nnz,o_nnz are: 3948 .vb 3949 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3950 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3951 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3952 .ve 3953 Here the space allocated is sum of all the above values i.e 34, and 3954 hence pre-allocation is perfect. 3955 3956 Level: intermediate 3957 3958 .keywords: matrix, aij, compressed row, sparse, parallel 3959 3960 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3961 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3962 @*/ 3963 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3964 { 3965 PetscErrorCode ierr; 3966 3967 PetscFunctionBegin; 3968 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3969 PetscValidType(B,1); 3970 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3971 PetscFunctionReturn(0); 3972 } 3973 3974 #undef __FUNCT__ 3975 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3976 /*@ 3977 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3978 CSR format the local rows. 3979 3980 Collective on MPI_Comm 3981 3982 Input Parameters: 3983 + comm - MPI communicator 3984 . m - number of local rows (Cannot be PETSC_DECIDE) 3985 . n - This value should be the same as the local size used in creating the 3986 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3987 calculated if N is given) For square matrices n is almost always m. 3988 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3989 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3990 . i - row indices 3991 . j - column indices 3992 - a - matrix values 3993 3994 Output Parameter: 3995 . mat - the matrix 3996 3997 Level: intermediate 3998 3999 Notes: 4000 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4001 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4002 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4003 4004 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4005 4006 The format which is used for the sparse matrix input, is equivalent to a 4007 row-major ordering.. i.e for the following matrix, the input data expected is 4008 as shown: 4009 4010 1 0 0 4011 2 0 3 P0 4012 ------- 4013 4 5 6 P1 4014 4015 Process0 [P0]: rows_owned=[0,1] 4016 i = {0,1,3} [size = nrow+1 = 2+1] 4017 j = {0,0,2} [size = nz = 6] 4018 v = {1,2,3} [size = nz = 6] 4019 4020 Process1 [P1]: rows_owned=[2] 4021 i = {0,3} [size = nrow+1 = 1+1] 4022 j = {0,1,2} [size = nz = 6] 4023 v = {4,5,6} [size = nz = 6] 4024 4025 .keywords: matrix, aij, compressed row, sparse, parallel 4026 4027 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4028 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4029 @*/ 4030 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4031 { 4032 PetscErrorCode ierr; 4033 4034 PetscFunctionBegin; 4035 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4036 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4037 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4038 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4039 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4040 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4041 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4042 PetscFunctionReturn(0); 4043 } 4044 4045 #undef __FUNCT__ 4046 #define __FUNCT__ "MatCreateAIJ" 4047 /*@C 4048 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4049 (the default parallel PETSc format). For good matrix assembly performance 4050 the user should preallocate the matrix storage by setting the parameters 4051 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4052 performance can be increased by more than a factor of 50. 4053 4054 Collective on MPI_Comm 4055 4056 Input Parameters: 4057 + comm - MPI communicator 4058 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4059 This value should be the same as the local size used in creating the 4060 y vector for the matrix-vector product y = Ax. 4061 . n - This value should be the same as the local size used in creating the 4062 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4063 calculated if N is given) For square matrices n is almost always m. 4064 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4065 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4066 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4067 (same value is used for all local rows) 4068 . d_nnz - array containing the number of nonzeros in the various rows of the 4069 DIAGONAL portion of the local submatrix (possibly different for each row) 4070 or NULL, if d_nz is used to specify the nonzero structure. 4071 The size of this array is equal to the number of local rows, i.e 'm'. 4072 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4073 submatrix (same value is used for all local rows). 4074 - o_nnz - array containing the number of nonzeros in the various rows of the 4075 OFF-DIAGONAL portion of the local submatrix (possibly different for 4076 each row) or NULL, if o_nz is used to specify the nonzero 4077 structure. The size of this array is equal to the number 4078 of local rows, i.e 'm'. 4079 4080 Output Parameter: 4081 . A - the matrix 4082 4083 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4084 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4085 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4086 4087 Notes: 4088 If the *_nnz parameter is given then the *_nz parameter is ignored 4089 4090 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4091 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4092 storage requirements for this matrix. 4093 4094 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4095 processor than it must be used on all processors that share the object for 4096 that argument. 4097 4098 The user MUST specify either the local or global matrix dimensions 4099 (possibly both). 4100 4101 The parallel matrix is partitioned across processors such that the 4102 first m0 rows belong to process 0, the next m1 rows belong to 4103 process 1, the next m2 rows belong to process 2 etc.. where 4104 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4105 values corresponding to [m x N] submatrix. 4106 4107 The columns are logically partitioned with the n0 columns belonging 4108 to 0th partition, the next n1 columns belonging to the next 4109 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4110 4111 The DIAGONAL portion of the local submatrix on any given processor 4112 is the submatrix corresponding to the rows and columns m,n 4113 corresponding to the given processor. i.e diagonal matrix on 4114 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4115 etc. The remaining portion of the local submatrix [m x (N-n)] 4116 constitute the OFF-DIAGONAL portion. The example below better 4117 illustrates this concept. 4118 4119 For a square global matrix we define each processor's diagonal portion 4120 to be its local rows and the corresponding columns (a square submatrix); 4121 each processor's off-diagonal portion encompasses the remainder of the 4122 local matrix (a rectangular submatrix). 4123 4124 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4125 4126 When calling this routine with a single process communicator, a matrix of 4127 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4128 type of communicator, use the construction mechanism: 4129 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4130 4131 By default, this format uses inodes (identical nodes) when possible. 4132 We search for consecutive rows with the same nonzero structure, thereby 4133 reusing matrix information to achieve increased efficiency. 4134 4135 Options Database Keys: 4136 + -mat_no_inode - Do not use inodes 4137 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4138 - -mat_aij_oneindex - Internally use indexing starting at 1 4139 rather than 0. Note that when calling MatSetValues(), 4140 the user still MUST index entries starting at 0! 4141 4142 4143 Example usage: 4144 4145 Consider the following 8x8 matrix with 34 non-zero values, that is 4146 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4147 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4148 as follows: 4149 4150 .vb 4151 1 2 0 | 0 3 0 | 0 4 4152 Proc0 0 5 6 | 7 0 0 | 8 0 4153 9 0 10 | 11 0 0 | 12 0 4154 ------------------------------------- 4155 13 0 14 | 15 16 17 | 0 0 4156 Proc1 0 18 0 | 19 20 21 | 0 0 4157 0 0 0 | 22 23 0 | 24 0 4158 ------------------------------------- 4159 Proc2 25 26 27 | 0 0 28 | 29 0 4160 30 0 0 | 31 32 33 | 0 34 4161 .ve 4162 4163 This can be represented as a collection of submatrices as: 4164 4165 .vb 4166 A B C 4167 D E F 4168 G H I 4169 .ve 4170 4171 Where the submatrices A,B,C are owned by proc0, D,E,F are 4172 owned by proc1, G,H,I are owned by proc2. 4173 4174 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4175 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4176 The 'M','N' parameters are 8,8, and have the same values on all procs. 4177 4178 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4179 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4180 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4181 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4182 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4183 matrix, ans [DF] as another SeqAIJ matrix. 4184 4185 When d_nz, o_nz parameters are specified, d_nz storage elements are 4186 allocated for every row of the local diagonal submatrix, and o_nz 4187 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4188 One way to choose d_nz and o_nz is to use the max nonzerors per local 4189 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4190 In this case, the values of d_nz,o_nz are: 4191 .vb 4192 proc0 : dnz = 2, o_nz = 2 4193 proc1 : dnz = 3, o_nz = 2 4194 proc2 : dnz = 1, o_nz = 4 4195 .ve 4196 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4197 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4198 for proc3. i.e we are using 12+15+10=37 storage locations to store 4199 34 values. 4200 4201 When d_nnz, o_nnz parameters are specified, the storage is specified 4202 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4203 In the above case the values for d_nnz,o_nnz are: 4204 .vb 4205 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4206 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4207 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4208 .ve 4209 Here the space allocated is sum of all the above values i.e 34, and 4210 hence pre-allocation is perfect. 4211 4212 Level: intermediate 4213 4214 .keywords: matrix, aij, compressed row, sparse, parallel 4215 4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4217 MPIAIJ, MatCreateMPIAIJWithArrays() 4218 @*/ 4219 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4220 { 4221 PetscErrorCode ierr; 4222 PetscMPIInt size; 4223 4224 PetscFunctionBegin; 4225 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4226 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4227 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4228 if (size > 1) { 4229 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4230 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4231 } else { 4232 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4233 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4234 } 4235 PetscFunctionReturn(0); 4236 } 4237 4238 #undef __FUNCT__ 4239 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4240 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4241 { 4242 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4243 4244 PetscFunctionBegin; 4245 if (Ad) *Ad = a->A; 4246 if (Ao) *Ao = a->B; 4247 if (colmap) *colmap = a->garray; 4248 PetscFunctionReturn(0); 4249 } 4250 4251 #undef __FUNCT__ 4252 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4253 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4254 { 4255 PetscErrorCode ierr; 4256 PetscInt i; 4257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4258 4259 PetscFunctionBegin; 4260 if (coloring->ctype == IS_COLORING_GLOBAL) { 4261 ISColoringValue *allcolors,*colors; 4262 ISColoring ocoloring; 4263 4264 /* set coloring for diagonal portion */ 4265 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4266 4267 /* set coloring for off-diagonal portion */ 4268 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4269 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4270 for (i=0; i<a->B->cmap->n; i++) { 4271 colors[i] = allcolors[a->garray[i]]; 4272 } 4273 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4274 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4275 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4276 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4277 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4278 ISColoringValue *colors; 4279 PetscInt *larray; 4280 ISColoring ocoloring; 4281 4282 /* set coloring for diagonal portion */ 4283 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4284 for (i=0; i<a->A->cmap->n; i++) { 4285 larray[i] = i + A->cmap->rstart; 4286 } 4287 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4288 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4289 for (i=0; i<a->A->cmap->n; i++) { 4290 colors[i] = coloring->colors[larray[i]]; 4291 } 4292 ierr = PetscFree(larray);CHKERRQ(ierr); 4293 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4294 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4295 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4296 4297 /* set coloring for off-diagonal portion */ 4298 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4299 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4300 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4301 for (i=0; i<a->B->cmap->n; i++) { 4302 colors[i] = coloring->colors[larray[i]]; 4303 } 4304 ierr = PetscFree(larray);CHKERRQ(ierr); 4305 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4306 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4307 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4308 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4309 PetscFunctionReturn(0); 4310 } 4311 4312 #undef __FUNCT__ 4313 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4314 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4315 { 4316 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4317 PetscErrorCode ierr; 4318 4319 PetscFunctionBegin; 4320 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4321 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4322 PetscFunctionReturn(0); 4323 } 4324 4325 #undef __FUNCT__ 4326 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4327 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4328 { 4329 PetscErrorCode ierr; 4330 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4331 PetscInt *indx; 4332 4333 PetscFunctionBegin; 4334 /* This routine will ONLY return MPIAIJ type matrix */ 4335 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4336 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4337 if (n == PETSC_DECIDE) { 4338 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4339 } 4340 /* Check sum(n) = N */ 4341 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4342 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4343 4344 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4345 rstart -= m; 4346 4347 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4348 for (i=0; i<m; i++) { 4349 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4350 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4351 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4352 } 4353 4354 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4355 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4356 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4357 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4358 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4359 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4360 PetscFunctionReturn(0); 4361 } 4362 4363 #undef __FUNCT__ 4364 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4365 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4366 { 4367 PetscErrorCode ierr; 4368 PetscInt m,N,i,rstart,nnz,Ii; 4369 PetscInt *indx; 4370 PetscScalar *values; 4371 4372 PetscFunctionBegin; 4373 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4374 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4375 for (i=0; i<m; i++) { 4376 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4377 Ii = i + rstart; 4378 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4379 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4380 } 4381 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 PetscFunctionReturn(0); 4384 } 4385 4386 #undef __FUNCT__ 4387 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4388 /*@ 4389 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4390 matrices from each processor 4391 4392 Collective on MPI_Comm 4393 4394 Input Parameters: 4395 + comm - the communicators the parallel matrix will live on 4396 . inmat - the input sequential matrices 4397 . n - number of local columns (or PETSC_DECIDE) 4398 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4399 4400 Output Parameter: 4401 . outmat - the parallel matrix generated 4402 4403 Level: advanced 4404 4405 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4406 4407 @*/ 4408 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4409 { 4410 PetscErrorCode ierr; 4411 PetscMPIInt size; 4412 4413 PetscFunctionBegin; 4414 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4415 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4416 if (size == 1) { 4417 if (scall == MAT_INITIAL_MATRIX) { 4418 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4419 } else { 4420 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4421 } 4422 } else { 4423 if (scall == MAT_INITIAL_MATRIX) { 4424 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4425 } 4426 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4427 } 4428 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4429 PetscFunctionReturn(0); 4430 } 4431 4432 #undef __FUNCT__ 4433 #define __FUNCT__ "MatFileSplit" 4434 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4435 { 4436 PetscErrorCode ierr; 4437 PetscMPIInt rank; 4438 PetscInt m,N,i,rstart,nnz; 4439 size_t len; 4440 const PetscInt *indx; 4441 PetscViewer out; 4442 char *name; 4443 Mat B; 4444 const PetscScalar *values; 4445 4446 PetscFunctionBegin; 4447 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4448 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4449 /* Should this be the type of the diagonal block of A? */ 4450 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4451 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4452 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4453 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4454 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4455 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4456 for (i=0; i<m; i++) { 4457 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4458 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4459 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4460 } 4461 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 4464 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4465 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4466 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4467 sprintf(name,"%s.%d",outfile,rank); 4468 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4469 ierr = PetscFree(name);CHKERRQ(ierr); 4470 ierr = MatView(B,out);CHKERRQ(ierr); 4471 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4472 ierr = MatDestroy(&B);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4477 #undef __FUNCT__ 4478 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4479 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4480 { 4481 PetscErrorCode ierr; 4482 Mat_Merge_SeqsToMPI *merge; 4483 PetscContainer container; 4484 4485 PetscFunctionBegin; 4486 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4487 if (container) { 4488 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4501 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4502 ierr = PetscFree(merge);CHKERRQ(ierr); 4503 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4504 } 4505 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 #include <../src/mat/utils/freespace.h> 4510 #include <petscbt.h> 4511 4512 #undef __FUNCT__ 4513 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4514 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4515 { 4516 PetscErrorCode ierr; 4517 MPI_Comm comm; 4518 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4519 PetscMPIInt size,rank,taga,*len_s; 4520 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4521 PetscInt proc,m; 4522 PetscInt **buf_ri,**buf_rj; 4523 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4524 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4525 MPI_Request *s_waits,*r_waits; 4526 MPI_Status *status; 4527 MatScalar *aa=a->a; 4528 MatScalar **abuf_r,*ba_i; 4529 Mat_Merge_SeqsToMPI *merge; 4530 PetscContainer container; 4531 4532 PetscFunctionBegin; 4533 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4534 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4535 4536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4537 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4538 4539 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4540 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4541 4542 bi = merge->bi; 4543 bj = merge->bj; 4544 buf_ri = merge->buf_ri; 4545 buf_rj = merge->buf_rj; 4546 4547 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4548 owners = merge->rowmap->range; 4549 len_s = merge->len_s; 4550 4551 /* send and recv matrix values */ 4552 /*-----------------------------*/ 4553 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4554 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4555 4556 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4557 for (proc=0,k=0; proc<size; proc++) { 4558 if (!len_s[proc]) continue; 4559 i = owners[proc]; 4560 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4561 k++; 4562 } 4563 4564 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4565 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4566 ierr = PetscFree(status);CHKERRQ(ierr); 4567 4568 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4569 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4570 4571 /* insert mat values of mpimat */ 4572 /*----------------------------*/ 4573 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4574 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4575 4576 for (k=0; k<merge->nrecv; k++) { 4577 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4578 nrows = *(buf_ri_k[k]); 4579 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4580 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4581 } 4582 4583 /* set values of ba */ 4584 m = merge->rowmap->n; 4585 for (i=0; i<m; i++) { 4586 arow = owners[rank] + i; 4587 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4588 bnzi = bi[i+1] - bi[i]; 4589 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4590 4591 /* add local non-zero vals of this proc's seqmat into ba */ 4592 anzi = ai[arow+1] - ai[arow]; 4593 aj = a->j + ai[arow]; 4594 aa = a->a + ai[arow]; 4595 nextaj = 0; 4596 for (j=0; nextaj<anzi; j++) { 4597 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4598 ba_i[j] += aa[nextaj++]; 4599 } 4600 } 4601 4602 /* add received vals into ba */ 4603 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4604 /* i-th row */ 4605 if (i == *nextrow[k]) { 4606 anzi = *(nextai[k]+1) - *nextai[k]; 4607 aj = buf_rj[k] + *(nextai[k]); 4608 aa = abuf_r[k] + *(nextai[k]); 4609 nextaj = 0; 4610 for (j=0; nextaj<anzi; j++) { 4611 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4612 ba_i[j] += aa[nextaj++]; 4613 } 4614 } 4615 nextrow[k]++; nextai[k]++; 4616 } 4617 } 4618 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4619 } 4620 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 4623 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4624 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4625 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4626 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4627 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4628 PetscFunctionReturn(0); 4629 } 4630 4631 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4632 4633 #undef __FUNCT__ 4634 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4635 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4636 { 4637 PetscErrorCode ierr; 4638 Mat B_mpi; 4639 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4640 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4641 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4642 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4643 PetscInt len,proc,*dnz,*onz,bs,cbs; 4644 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4645 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4646 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4647 MPI_Status *status; 4648 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4649 PetscBT lnkbt; 4650 Mat_Merge_SeqsToMPI *merge; 4651 PetscContainer container; 4652 4653 PetscFunctionBegin; 4654 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4655 4656 /* make sure it is a PETSc comm */ 4657 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4659 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4660 4661 ierr = PetscNew(&merge);CHKERRQ(ierr); 4662 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4663 4664 /* determine row ownership */ 4665 /*---------------------------------------------------------*/ 4666 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4671 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4673 4674 m = merge->rowmap->n; 4675 owners = merge->rowmap->range; 4676 4677 /* determine the number of messages to send, their lengths */ 4678 /*---------------------------------------------------------*/ 4679 len_s = merge->len_s; 4680 4681 len = 0; /* length of buf_si[] */ 4682 merge->nsend = 0; 4683 for (proc=0; proc<size; proc++) { 4684 len_si[proc] = 0; 4685 if (proc == rank) { 4686 len_s[proc] = 0; 4687 } else { 4688 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4689 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4690 } 4691 if (len_s[proc]) { 4692 merge->nsend++; 4693 nrows = 0; 4694 for (i=owners[proc]; i<owners[proc+1]; i++) { 4695 if (ai[i+1] > ai[i]) nrows++; 4696 } 4697 len_si[proc] = 2*(nrows+1); 4698 len += len_si[proc]; 4699 } 4700 } 4701 4702 /* determine the number and length of messages to receive for ij-structure */ 4703 /*-------------------------------------------------------------------------*/ 4704 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4705 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4706 4707 /* post the Irecv of j-structure */ 4708 /*-------------------------------*/ 4709 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4710 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4711 4712 /* post the Isend of j-structure */ 4713 /*--------------------------------*/ 4714 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4715 4716 for (proc=0, k=0; proc<size; proc++) { 4717 if (!len_s[proc]) continue; 4718 i = owners[proc]; 4719 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4720 k++; 4721 } 4722 4723 /* receives and sends of j-structure are complete */ 4724 /*------------------------------------------------*/ 4725 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4726 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4727 4728 /* send and recv i-structure */ 4729 /*---------------------------*/ 4730 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4731 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4732 4733 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4734 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4735 for (proc=0,k=0; proc<size; proc++) { 4736 if (!len_s[proc]) continue; 4737 /* form outgoing message for i-structure: 4738 buf_si[0]: nrows to be sent 4739 [1:nrows]: row index (global) 4740 [nrows+1:2*nrows+1]: i-structure index 4741 */ 4742 /*-------------------------------------------*/ 4743 nrows = len_si[proc]/2 - 1; 4744 buf_si_i = buf_si + nrows+1; 4745 buf_si[0] = nrows; 4746 buf_si_i[0] = 0; 4747 nrows = 0; 4748 for (i=owners[proc]; i<owners[proc+1]; i++) { 4749 anzi = ai[i+1] - ai[i]; 4750 if (anzi) { 4751 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4752 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4753 nrows++; 4754 } 4755 } 4756 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4757 k++; 4758 buf_si += len_si[proc]; 4759 } 4760 4761 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4762 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4763 4764 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4765 for (i=0; i<merge->nrecv; i++) { 4766 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4767 } 4768 4769 ierr = PetscFree(len_si);CHKERRQ(ierr); 4770 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4771 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4772 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4775 ierr = PetscFree(status);CHKERRQ(ierr); 4776 4777 /* compute a local seq matrix in each processor */ 4778 /*----------------------------------------------*/ 4779 /* allocate bi array and free space for accumulating nonzero column info */ 4780 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4781 bi[0] = 0; 4782 4783 /* create and initialize a linked list */ 4784 nlnk = N+1; 4785 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4786 4787 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4788 len = ai[owners[rank+1]] - ai[owners[rank]]; 4789 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4790 4791 current_space = free_space; 4792 4793 /* determine symbolic info for each local row */ 4794 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4795 4796 for (k=0; k<merge->nrecv; k++) { 4797 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4798 nrows = *buf_ri_k[k]; 4799 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4800 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4801 } 4802 4803 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4804 len = 0; 4805 for (i=0; i<m; i++) { 4806 bnzi = 0; 4807 /* add local non-zero cols of this proc's seqmat into lnk */ 4808 arow = owners[rank] + i; 4809 anzi = ai[arow+1] - ai[arow]; 4810 aj = a->j + ai[arow]; 4811 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4812 bnzi += nlnk; 4813 /* add received col data into lnk */ 4814 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4815 if (i == *nextrow[k]) { /* i-th row */ 4816 anzi = *(nextai[k]+1) - *nextai[k]; 4817 aj = buf_rj[k] + *nextai[k]; 4818 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4819 bnzi += nlnk; 4820 nextrow[k]++; nextai[k]++; 4821 } 4822 } 4823 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4824 4825 /* if free space is not available, make more free space */ 4826 if (current_space->local_remaining<bnzi) { 4827 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4828 nspacedouble++; 4829 } 4830 /* copy data into free space, then initialize lnk */ 4831 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4832 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4833 4834 current_space->array += bnzi; 4835 current_space->local_used += bnzi; 4836 current_space->local_remaining -= bnzi; 4837 4838 bi[i+1] = bi[i] + bnzi; 4839 } 4840 4841 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4842 4843 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4844 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4845 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4846 4847 /* create symbolic parallel matrix B_mpi */ 4848 /*---------------------------------------*/ 4849 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4850 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4851 if (n==PETSC_DECIDE) { 4852 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4853 } else { 4854 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4855 } 4856 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4857 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4858 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4859 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4860 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4861 4862 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4863 B_mpi->assembled = PETSC_FALSE; 4864 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4865 merge->bi = bi; 4866 merge->bj = bj; 4867 merge->buf_ri = buf_ri; 4868 merge->buf_rj = buf_rj; 4869 merge->coi = NULL; 4870 merge->coj = NULL; 4871 merge->owners_co = NULL; 4872 4873 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4874 4875 /* attach the supporting struct to B_mpi for reuse */ 4876 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4877 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4878 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4879 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4880 *mpimat = B_mpi; 4881 4882 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4883 PetscFunctionReturn(0); 4884 } 4885 4886 #undef __FUNCT__ 4887 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4888 /*@C 4889 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4890 matrices from each processor 4891 4892 Collective on MPI_Comm 4893 4894 Input Parameters: 4895 + comm - the communicators the parallel matrix will live on 4896 . seqmat - the input sequential matrices 4897 . m - number of local rows (or PETSC_DECIDE) 4898 . n - number of local columns (or PETSC_DECIDE) 4899 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4900 4901 Output Parameter: 4902 . mpimat - the parallel matrix generated 4903 4904 Level: advanced 4905 4906 Notes: 4907 The dimensions of the sequential matrix in each processor MUST be the same. 4908 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4909 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4910 @*/ 4911 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4912 { 4913 PetscErrorCode ierr; 4914 PetscMPIInt size; 4915 4916 PetscFunctionBegin; 4917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4918 if (size == 1) { 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4922 } else { 4923 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4924 } 4925 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 if (scall == MAT_INITIAL_MATRIX) { 4930 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4931 } 4932 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4933 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4939 /*@ 4940 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4941 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4942 with MatGetSize() 4943 4944 Not Collective 4945 4946 Input Parameters: 4947 + A - the matrix 4948 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4949 4950 Output Parameter: 4951 . A_loc - the local sequential matrix generated 4952 4953 Level: developer 4954 4955 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4956 4957 @*/ 4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4959 { 4960 PetscErrorCode ierr; 4961 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4962 Mat_SeqAIJ *mat,*a,*b; 4963 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4964 MatScalar *aa,*ba,*cam; 4965 PetscScalar *ca; 4966 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4967 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4968 PetscBool match; 4969 4970 PetscFunctionBegin; 4971 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4972 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4973 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4974 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4975 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4976 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4977 aa = a->a; ba = b->a; 4978 if (scall == MAT_INITIAL_MATRIX) { 4979 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4980 ci[0] = 0; 4981 for (i=0; i<am; i++) { 4982 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4983 } 4984 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4985 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4986 k = 0; 4987 for (i=0; i<am; i++) { 4988 ncols_o = bi[i+1] - bi[i]; 4989 ncols_d = ai[i+1] - ai[i]; 4990 /* off-diagonal portion of A */ 4991 for (jo=0; jo<ncols_o; jo++) { 4992 col = cmap[*bj]; 4993 if (col >= cstart) break; 4994 cj[k] = col; bj++; 4995 ca[k++] = *ba++; 4996 } 4997 /* diagonal portion of A */ 4998 for (j=0; j<ncols_d; j++) { 4999 cj[k] = cstart + *aj++; 5000 ca[k++] = *aa++; 5001 } 5002 /* off-diagonal portion of A */ 5003 for (j=jo; j<ncols_o; j++) { 5004 cj[k] = cmap[*bj++]; 5005 ca[k++] = *ba++; 5006 } 5007 } 5008 /* put together the new matrix */ 5009 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5010 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5011 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5012 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5013 mat->free_a = PETSC_TRUE; 5014 mat->free_ij = PETSC_TRUE; 5015 mat->nonew = 0; 5016 } else if (scall == MAT_REUSE_MATRIX) { 5017 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5018 ci = mat->i; cj = mat->j; cam = mat->a; 5019 for (i=0; i<am; i++) { 5020 /* off-diagonal portion of A */ 5021 ncols_o = bi[i+1] - bi[i]; 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 *cam++ = *ba++; bj++; 5026 } 5027 /* diagonal portion of A */ 5028 ncols_d = ai[i+1] - ai[i]; 5029 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5030 /* off-diagonal portion of A */ 5031 for (j=jo; j<ncols_o; j++) { 5032 *cam++ = *ba++; bj++; 5033 } 5034 } 5035 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5036 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5037 PetscFunctionReturn(0); 5038 } 5039 5040 #undef __FUNCT__ 5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5042 /*@C 5043 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5044 5045 Not Collective 5046 5047 Input Parameters: 5048 + A - the matrix 5049 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5050 - row, col - index sets of rows and columns to extract (or NULL) 5051 5052 Output Parameter: 5053 . A_loc - the local sequential matrix generated 5054 5055 Level: developer 5056 5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5058 5059 @*/ 5060 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5061 { 5062 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5063 PetscErrorCode ierr; 5064 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5065 IS isrowa,iscola; 5066 Mat *aloc; 5067 PetscBool match; 5068 5069 PetscFunctionBegin; 5070 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5071 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5072 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5073 if (!row) { 5074 start = A->rmap->rstart; end = A->rmap->rend; 5075 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5076 } else { 5077 isrowa = *row; 5078 } 5079 if (!col) { 5080 start = A->cmap->rstart; 5081 cmap = a->garray; 5082 nzA = a->A->cmap->n; 5083 nzB = a->B->cmap->n; 5084 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5085 ncols = 0; 5086 for (i=0; i<nzB; i++) { 5087 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5088 else break; 5089 } 5090 imark = i; 5091 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5092 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5093 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5094 } else { 5095 iscola = *col; 5096 } 5097 if (scall != MAT_INITIAL_MATRIX) { 5098 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5099 aloc[0] = *A_loc; 5100 } 5101 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5102 *A_loc = aloc[0]; 5103 ierr = PetscFree(aloc);CHKERRQ(ierr); 5104 if (!row) { 5105 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5106 } 5107 if (!col) { 5108 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5109 } 5110 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5111 PetscFunctionReturn(0); 5112 } 5113 5114 #undef __FUNCT__ 5115 #define __FUNCT__ "MatGetBrowsOfAcols" 5116 /*@C 5117 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5118 5119 Collective on Mat 5120 5121 Input Parameters: 5122 + A,B - the matrices in mpiaij format 5123 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5125 5126 Output Parameter: 5127 + rowb, colb - index sets of rows and columns of B to extract 5128 - B_seq - the sequential matrix generated 5129 5130 Level: developer 5131 5132 @*/ 5133 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5134 { 5135 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5136 PetscErrorCode ierr; 5137 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5138 IS isrowb,iscolb; 5139 Mat *bseq=NULL; 5140 5141 PetscFunctionBegin; 5142 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5143 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5144 } 5145 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5146 5147 if (scall == MAT_INITIAL_MATRIX) { 5148 start = A->cmap->rstart; 5149 cmap = a->garray; 5150 nzA = a->A->cmap->n; 5151 nzB = a->B->cmap->n; 5152 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5153 ncols = 0; 5154 for (i=0; i<nzB; i++) { /* row < local row index */ 5155 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5156 else break; 5157 } 5158 imark = i; 5159 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5160 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5161 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5162 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5163 } else { 5164 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5165 isrowb = *rowb; iscolb = *colb; 5166 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5167 bseq[0] = *B_seq; 5168 } 5169 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5170 *B_seq = bseq[0]; 5171 ierr = PetscFree(bseq);CHKERRQ(ierr); 5172 if (!rowb) { 5173 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5174 } else { 5175 *rowb = isrowb; 5176 } 5177 if (!colb) { 5178 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5179 } else { 5180 *colb = iscolb; 5181 } 5182 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5188 /* 5189 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5190 of the OFF-DIAGONAL portion of local A 5191 5192 Collective on Mat 5193 5194 Input Parameters: 5195 + A,B - the matrices in mpiaij format 5196 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5197 5198 Output Parameter: 5199 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5200 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5201 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5202 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5203 5204 Level: developer 5205 5206 */ 5207 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5208 { 5209 VecScatter_MPI_General *gen_to,*gen_from; 5210 PetscErrorCode ierr; 5211 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5212 Mat_SeqAIJ *b_oth; 5213 VecScatter ctx =a->Mvctx; 5214 MPI_Comm comm; 5215 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5216 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5217 PetscScalar *rvalues,*svalues; 5218 MatScalar *b_otha,*bufa,*bufA; 5219 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5220 MPI_Request *rwaits = NULL,*swaits = NULL; 5221 MPI_Status *sstatus,rstatus; 5222 PetscMPIInt jj; 5223 PetscInt *cols,sbs,rbs; 5224 PetscScalar *vals; 5225 5226 PetscFunctionBegin; 5227 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5228 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5229 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5230 } 5231 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5232 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5233 5234 gen_to = (VecScatter_MPI_General*)ctx->todata; 5235 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5236 rvalues = gen_from->values; /* holds the length of receiving row */ 5237 svalues = gen_to->values; /* holds the length of sending row */ 5238 nrecvs = gen_from->n; 5239 nsends = gen_to->n; 5240 5241 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5242 srow = gen_to->indices; /* local row index to be sent */ 5243 sstarts = gen_to->starts; 5244 sprocs = gen_to->procs; 5245 sstatus = gen_to->sstatus; 5246 sbs = gen_to->bs; 5247 rstarts = gen_from->starts; 5248 rprocs = gen_from->procs; 5249 rbs = gen_from->bs; 5250 5251 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5252 if (scall == MAT_INITIAL_MATRIX) { 5253 /* i-array */ 5254 /*---------*/ 5255 /* post receives */ 5256 for (i=0; i<nrecvs; i++) { 5257 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5258 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5259 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5260 } 5261 5262 /* pack the outgoing message */ 5263 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5264 5265 sstartsj[0] = 0; 5266 rstartsj[0] = 0; 5267 len = 0; /* total length of j or a array to be sent */ 5268 k = 0; 5269 for (i=0; i<nsends; i++) { 5270 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5271 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5272 for (j=0; j<nrows; j++) { 5273 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5274 for (l=0; l<sbs; l++) { 5275 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5276 5277 rowlen[j*sbs+l] = ncols; 5278 5279 len += ncols; 5280 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5281 } 5282 k++; 5283 } 5284 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5285 5286 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5287 } 5288 /* recvs and sends of i-array are completed */ 5289 i = nrecvs; 5290 while (i--) { 5291 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5292 } 5293 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5294 5295 /* allocate buffers for sending j and a arrays */ 5296 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5297 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5298 5299 /* create i-array of B_oth */ 5300 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5301 5302 b_othi[0] = 0; 5303 len = 0; /* total length of j or a array to be received */ 5304 k = 0; 5305 for (i=0; i<nrecvs; i++) { 5306 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5307 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5308 for (j=0; j<nrows; j++) { 5309 b_othi[k+1] = b_othi[k] + rowlen[j]; 5310 len += rowlen[j]; k++; 5311 } 5312 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5313 } 5314 5315 /* allocate space for j and a arrrays of B_oth */ 5316 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5317 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5318 5319 /* j-array */ 5320 /*---------*/ 5321 /* post receives of j-array */ 5322 for (i=0; i<nrecvs; i++) { 5323 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5324 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5325 } 5326 5327 /* pack the outgoing message j-array */ 5328 k = 0; 5329 for (i=0; i<nsends; i++) { 5330 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5331 bufJ = bufj+sstartsj[i]; 5332 for (j=0; j<nrows; j++) { 5333 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5334 for (ll=0; ll<sbs; ll++) { 5335 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5336 for (l=0; l<ncols; l++) { 5337 *bufJ++ = cols[l]; 5338 } 5339 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5340 } 5341 } 5342 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5343 } 5344 5345 /* recvs and sends of j-array are completed */ 5346 i = nrecvs; 5347 while (i--) { 5348 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5349 } 5350 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5351 } else if (scall == MAT_REUSE_MATRIX) { 5352 sstartsj = *startsj_s; 5353 rstartsj = *startsj_r; 5354 bufa = *bufa_ptr; 5355 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5356 b_otha = b_oth->a; 5357 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5358 5359 /* a-array */ 5360 /*---------*/ 5361 /* post receives of a-array */ 5362 for (i=0; i<nrecvs; i++) { 5363 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5364 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5365 } 5366 5367 /* pack the outgoing message a-array */ 5368 k = 0; 5369 for (i=0; i<nsends; i++) { 5370 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5371 bufA = bufa+sstartsj[i]; 5372 for (j=0; j<nrows; j++) { 5373 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5374 for (ll=0; ll<sbs; ll++) { 5375 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5376 for (l=0; l<ncols; l++) { 5377 *bufA++ = vals[l]; 5378 } 5379 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5380 } 5381 } 5382 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5383 } 5384 /* recvs and sends of a-array are completed */ 5385 i = nrecvs; 5386 while (i--) { 5387 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5388 } 5389 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5390 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5391 5392 if (scall == MAT_INITIAL_MATRIX) { 5393 /* put together the new matrix */ 5394 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5395 5396 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5397 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5398 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5399 b_oth->free_a = PETSC_TRUE; 5400 b_oth->free_ij = PETSC_TRUE; 5401 b_oth->nonew = 0; 5402 5403 ierr = PetscFree(bufj);CHKERRQ(ierr); 5404 if (!startsj_s || !bufa_ptr) { 5405 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5406 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5407 } else { 5408 *startsj_s = sstartsj; 5409 *startsj_r = rstartsj; 5410 *bufa_ptr = bufa; 5411 } 5412 } 5413 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5414 PetscFunctionReturn(0); 5415 } 5416 5417 #undef __FUNCT__ 5418 #define __FUNCT__ "MatGetCommunicationStructs" 5419 /*@C 5420 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5421 5422 Not Collective 5423 5424 Input Parameters: 5425 . A - The matrix in mpiaij format 5426 5427 Output Parameter: 5428 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5429 . colmap - A map from global column index to local index into lvec 5430 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5431 5432 Level: developer 5433 5434 @*/ 5435 #if defined(PETSC_USE_CTABLE) 5436 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5437 #else 5438 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5439 #endif 5440 { 5441 Mat_MPIAIJ *a; 5442 5443 PetscFunctionBegin; 5444 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5445 PetscValidPointer(lvec, 2); 5446 PetscValidPointer(colmap, 3); 5447 PetscValidPointer(multScatter, 4); 5448 a = (Mat_MPIAIJ*) A->data; 5449 if (lvec) *lvec = a->lvec; 5450 if (colmap) *colmap = a->colmap; 5451 if (multScatter) *multScatter = a->Mvctx; 5452 PetscFunctionReturn(0); 5453 } 5454 5455 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5458 5459 #undef __FUNCT__ 5460 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5461 /* 5462 Computes (B'*A')' since computing B*A directly is untenable 5463 5464 n p p 5465 ( ) ( ) ( ) 5466 m ( A ) * n ( B ) = m ( C ) 5467 ( ) ( ) ( ) 5468 5469 */ 5470 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5471 { 5472 PetscErrorCode ierr; 5473 Mat At,Bt,Ct; 5474 5475 PetscFunctionBegin; 5476 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5477 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5478 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5479 ierr = MatDestroy(&At);CHKERRQ(ierr); 5480 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5481 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5482 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5483 PetscFunctionReturn(0); 5484 } 5485 5486 #undef __FUNCT__ 5487 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5488 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5489 { 5490 PetscErrorCode ierr; 5491 PetscInt m=A->rmap->n,n=B->cmap->n; 5492 Mat Cmat; 5493 5494 PetscFunctionBegin; 5495 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5496 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5497 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5498 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5499 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5500 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5501 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5502 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5503 5504 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5505 5506 *C = Cmat; 5507 PetscFunctionReturn(0); 5508 } 5509 5510 /* ----------------------------------------------------------------*/ 5511 #undef __FUNCT__ 5512 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5513 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5514 { 5515 PetscErrorCode ierr; 5516 5517 PetscFunctionBegin; 5518 if (scall == MAT_INITIAL_MATRIX) { 5519 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5520 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5521 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5522 } 5523 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5524 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5525 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5526 PetscFunctionReturn(0); 5527 } 5528 5529 #if defined(PETSC_HAVE_MUMPS) 5530 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5531 #endif 5532 #if defined(PETSC_HAVE_PASTIX) 5533 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5534 #endif 5535 #if defined(PETSC_HAVE_SUPERLU_DIST) 5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_CLIQUE) 5539 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5540 #endif 5541 5542 /*MC 5543 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5544 5545 Options Database Keys: 5546 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5547 5548 Level: beginner 5549 5550 .seealso: MatCreateAIJ() 5551 M*/ 5552 5553 #undef __FUNCT__ 5554 #define __FUNCT__ "MatCreate_MPIAIJ" 5555 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5556 { 5557 Mat_MPIAIJ *b; 5558 PetscErrorCode ierr; 5559 PetscMPIInt size; 5560 5561 PetscFunctionBegin; 5562 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5563 5564 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5565 B->data = (void*)b; 5566 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5567 B->assembled = PETSC_FALSE; 5568 B->insertmode = NOT_SET_VALUES; 5569 b->size = size; 5570 5571 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5572 5573 /* build cache for off array entries formed */ 5574 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5575 5576 b->donotstash = PETSC_FALSE; 5577 b->colmap = 0; 5578 b->garray = 0; 5579 b->roworiented = PETSC_TRUE; 5580 5581 /* stuff used for matrix vector multiply */ 5582 b->lvec = NULL; 5583 b->Mvctx = NULL; 5584 5585 /* stuff for MatGetRow() */ 5586 b->rowindices = 0; 5587 b->rowvalues = 0; 5588 b->getrowactive = PETSC_FALSE; 5589 5590 /* flexible pointer used in CUSP/CUSPARSE classes */ 5591 b->spptr = NULL; 5592 5593 #if defined(PETSC_HAVE_MUMPS) 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5595 #endif 5596 #if defined(PETSC_HAVE_PASTIX) 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5598 #endif 5599 #if defined(PETSC_HAVE_SUPERLU_DIST) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_CLIQUE) 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5604 #endif 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5619 PetscFunctionReturn(0); 5620 } 5621 5622 #undef __FUNCT__ 5623 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5624 /*@ 5625 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5626 and "off-diagonal" part of the matrix in CSR format. 5627 5628 Collective on MPI_Comm 5629 5630 Input Parameters: 5631 + comm - MPI communicator 5632 . m - number of local rows (Cannot be PETSC_DECIDE) 5633 . n - This value should be the same as the local size used in creating the 5634 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5635 calculated if N is given) For square matrices n is almost always m. 5636 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5637 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5638 . i - row indices for "diagonal" portion of matrix 5639 . j - column indices 5640 . a - matrix values 5641 . oi - row indices for "off-diagonal" portion of matrix 5642 . oj - column indices 5643 - oa - matrix values 5644 5645 Output Parameter: 5646 . mat - the matrix 5647 5648 Level: advanced 5649 5650 Notes: 5651 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5652 must free the arrays once the matrix has been destroyed and not before. 5653 5654 The i and j indices are 0 based 5655 5656 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5657 5658 This sets local rows and cannot be used to set off-processor values. 5659 5660 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5661 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5662 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5663 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5664 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5665 communication if it is known that only local entries will be set. 5666 5667 .keywords: matrix, aij, compressed row, sparse, parallel 5668 5669 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5670 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5671 @*/ 5672 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5673 { 5674 PetscErrorCode ierr; 5675 Mat_MPIAIJ *maij; 5676 5677 PetscFunctionBegin; 5678 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5679 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5680 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5681 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5682 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5683 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5684 maij = (Mat_MPIAIJ*) (*mat)->data; 5685 5686 (*mat)->preallocated = PETSC_TRUE; 5687 5688 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5689 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5690 5691 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5692 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5693 5694 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 5699 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5702 PetscFunctionReturn(0); 5703 } 5704 5705 /* 5706 Special version for direct calls from Fortran 5707 */ 5708 #include <petsc-private/fortranimpl.h> 5709 5710 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5711 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5712 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5713 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5714 #endif 5715 5716 /* Change these macros so can be used in void function */ 5717 #undef CHKERRQ 5718 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5719 #undef SETERRQ2 5720 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5721 #undef SETERRQ3 5722 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5723 #undef SETERRQ 5724 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5725 5726 #undef __FUNCT__ 5727 #define __FUNCT__ "matsetvaluesmpiaij_" 5728 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5729 { 5730 Mat mat = *mmat; 5731 PetscInt m = *mm, n = *mn; 5732 InsertMode addv = *maddv; 5733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5734 PetscScalar value; 5735 PetscErrorCode ierr; 5736 5737 MatCheckPreallocated(mat,1); 5738 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5739 5740 #if defined(PETSC_USE_DEBUG) 5741 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5742 #endif 5743 { 5744 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5745 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5746 PetscBool roworiented = aij->roworiented; 5747 5748 /* Some Variables required in the macro */ 5749 Mat A = aij->A; 5750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5751 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5752 MatScalar *aa = a->a; 5753 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5754 Mat B = aij->B; 5755 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5756 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5757 MatScalar *ba = b->a; 5758 5759 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5760 PetscInt nonew = a->nonew; 5761 MatScalar *ap1,*ap2; 5762 5763 PetscFunctionBegin; 5764 for (i=0; i<m; i++) { 5765 if (im[i] < 0) continue; 5766 #if defined(PETSC_USE_DEBUG) 5767 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5768 #endif 5769 if (im[i] >= rstart && im[i] < rend) { 5770 row = im[i] - rstart; 5771 lastcol1 = -1; 5772 rp1 = aj + ai[row]; 5773 ap1 = aa + ai[row]; 5774 rmax1 = aimax[row]; 5775 nrow1 = ailen[row]; 5776 low1 = 0; 5777 high1 = nrow1; 5778 lastcol2 = -1; 5779 rp2 = bj + bi[row]; 5780 ap2 = ba + bi[row]; 5781 rmax2 = bimax[row]; 5782 nrow2 = bilen[row]; 5783 low2 = 0; 5784 high2 = nrow2; 5785 5786 for (j=0; j<n; j++) { 5787 if (roworiented) value = v[i*n+j]; 5788 else value = v[i+j*m]; 5789 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5790 if (in[j] >= cstart && in[j] < cend) { 5791 col = in[j] - cstart; 5792 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5793 } else if (in[j] < 0) continue; 5794 #if defined(PETSC_USE_DEBUG) 5795 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5796 #endif 5797 else { 5798 if (mat->was_assembled) { 5799 if (!aij->colmap) { 5800 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5801 } 5802 #if defined(PETSC_USE_CTABLE) 5803 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5804 col--; 5805 #else 5806 col = aij->colmap[in[j]] - 1; 5807 #endif 5808 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5809 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5810 col = in[j]; 5811 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5812 B = aij->B; 5813 b = (Mat_SeqAIJ*)B->data; 5814 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5815 rp2 = bj + bi[row]; 5816 ap2 = ba + bi[row]; 5817 rmax2 = bimax[row]; 5818 nrow2 = bilen[row]; 5819 low2 = 0; 5820 high2 = nrow2; 5821 bm = aij->B->rmap->n; 5822 ba = b->a; 5823 } 5824 } else col = in[j]; 5825 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5826 } 5827 } 5828 } else if (!aij->donotstash) { 5829 if (roworiented) { 5830 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5831 } else { 5832 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5833 } 5834 } 5835 } 5836 } 5837 PetscFunctionReturnVoid(); 5838 } 5839 5840