1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscMPIInt size = mat->size; 765 PetscSF sf; 766 PetscInt *lrows; 767 PetscSFNode *rrows; 768 PetscInt lastidx = -1, r, p = 0, len = 0; 769 PetscErrorCode ierr; 770 771 PetscFunctionBegin; 772 /* Create SF where leaves are input rows and roots are owned rows */ 773 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 774 for (r = 0; r < n; ++r) lrows[r] = -1; 775 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 776 for (r = 0; r < N; ++r) { 777 const PetscInt idx = rows[r]; 778 PetscBool found = PETSC_FALSE; 779 /* Trick for efficient searching for sorted rows */ 780 if (lastidx > idx) p = 0; 781 lastidx = idx; 782 for (; p < size; ++p) { 783 if (idx >= owners[p] && idx < owners[p+1]) { 784 rrows[r].rank = p; 785 rrows[r].index = rows[r] - owners[p]; 786 found = PETSC_TRUE; 787 break; 788 } 789 } 790 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 791 } 792 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 793 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 794 /* Collect flags for rows to be zeroed */ 795 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 796 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 797 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 798 /* Compress and put in row numbers */ 799 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 800 /* fix right hand side if needed */ 801 if (x && b) { 802 const PetscScalar *xx; 803 PetscScalar *bb; 804 805 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 806 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 807 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 808 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 809 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 810 } 811 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 812 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 813 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 814 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 815 } else if (diag != 0.0) { 816 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 817 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 818 for (r = 0; r < len; ++r) { 819 const PetscInt row = lrows[r] + A->rmap->rstart; 820 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 824 } else { 825 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 826 } 827 ierr = PetscFree(lrows);CHKERRQ(ierr); 828 829 /* only change matrix nonzero state if pattern was allowed to be changed */ 830 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 831 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 832 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 833 } 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 842 PetscErrorCode ierr; 843 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 844 PetscInt i,j,r,m,p = 0,len = 0; 845 PetscInt *lrows,*owners = A->rmap->range; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb,*mask; 850 Vec xmask,lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 852 const PetscInt *aj, *ii,*ridx; 853 PetscScalar *aa; 854 #if defined(PETSC_DEBUG) 855 PetscBool found = PETSC_FALSE; 856 #endif 857 858 PetscFunctionBegin; 859 /* Create SF where leaves are input rows and roots are owned rows */ 860 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 861 for (r = 0; r < n; ++r) lrows[r] = -1; 862 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 863 for (r = 0; r < N; ++r) { 864 const PetscInt idx = rows[r]; 865 PetscBool found = PETSC_FALSE; 866 /* Trick for efficient searching for sorted rows */ 867 if (lastidx > idx) p = 0; 868 lastidx = idx; 869 for (; p < size; ++p) { 870 if (idx >= owners[p] && idx < owners[p+1]) { 871 rrows[r].rank = p; 872 rrows[r].index = rows[r] - owners[p]; 873 found = PETSC_TRUE; 874 break; 875 } 876 } 877 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 878 } 879 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 880 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 881 /* Collect flags for rows to be zeroed */ 882 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 883 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 884 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 885 /* Compress and put in row numbers */ 886 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 887 /* zero diagonal part of matrix */ 888 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 889 /* handle off diagonal part of matrix */ 890 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 891 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 892 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 893 for (i=0; i<len; i++) bb[lrows[i]] = 1; 894 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 895 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 898 if (x) { 899 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 900 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 901 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 902 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 903 } 904 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 905 /* remove zeroed rows of off diagonal matrix */ 906 ii = aij->i; 907 for (i=0; i<len; i++) { 908 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 909 } 910 /* loop over all elements of off process part of matrix zeroing removed columns*/ 911 if (aij->compressedrow.use) { 912 m = aij->compressedrow.nrows; 913 ii = aij->compressedrow.i; 914 ridx = aij->compressedrow.rindex; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 920 for (j=0; j<n; j++) { 921 if (PetscAbsScalar(mask[*aj])) { 922 if (b) bb[*ridx] -= *aa*xx[*aj]; 923 *aa = 0.0; 924 } 925 aa++; 926 aj++; 927 } 928 ridx++; 929 } 930 } else { /* do not use compressed row format */ 931 m = l->B->rmap->n; 932 for (i=0; i<m; i++) { 933 n = ii[i+1] - ii[i]; 934 aj = aij->j + ii[i]; 935 aa = aij->a + ii[i]; 936 for (j=0; j<n; j++) { 937 if (PetscAbsScalar(mask[*aj])) { 938 if (b) bb[i] -= *aa*xx[*aj]; 939 *aa = 0.0; 940 } 941 aa++; 942 aj++; 943 } 944 } 945 } 946 if (x) { 947 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 948 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 949 } 950 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 951 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 952 ierr = PetscFree(lrows);CHKERRQ(ierr); 953 954 /* only change matrix nonzero state if pattern was allowed to be changed */ 955 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 956 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 957 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 958 } 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMult_MPIAIJ" 964 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 965 { 966 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 967 PetscErrorCode ierr; 968 PetscInt nt; 969 970 PetscFunctionBegin; 971 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 972 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 973 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 975 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 976 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 977 PetscFunctionReturn(0); 978 } 979 980 #undef __FUNCT__ 981 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 985 PetscErrorCode ierr; 986 987 PetscFunctionBegin; 988 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultAdd_MPIAIJ" 994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 999 PetscFunctionBegin; 1000 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1002 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1009 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1010 { 1011 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1012 PetscErrorCode ierr; 1013 PetscBool merged; 1014 1015 PetscFunctionBegin; 1016 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1017 /* do nondiagonal part */ 1018 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1019 if (!merged) { 1020 /* send it on its way */ 1021 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1022 /* do local part */ 1023 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1024 /* receive remote parts: note this assumes the values are not actually */ 1025 /* added in yy until the next line, */ 1026 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1027 } else { 1028 /* do local part */ 1029 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1030 /* send it on its way */ 1031 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1032 /* values actually were received in the Begin() but we need to call this nop */ 1033 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1034 } 1035 PetscFunctionReturn(0); 1036 } 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1040 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1041 { 1042 MPI_Comm comm; 1043 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1044 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1045 IS Me,Notme; 1046 PetscErrorCode ierr; 1047 PetscInt M,N,first,last,*notme,i; 1048 PetscMPIInt size; 1049 1050 PetscFunctionBegin; 1051 /* Easy test: symmetric diagonal block */ 1052 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1053 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1054 if (!*f) PetscFunctionReturn(0); 1055 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1056 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1057 if (size == 1) PetscFunctionReturn(0); 1058 1059 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1060 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1061 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1062 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1063 for (i=0; i<first; i++) notme[i] = i; 1064 for (i=last; i<M; i++) notme[i-last+first] = i; 1065 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1066 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1067 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1068 Aoff = Aoffs[0]; 1069 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1070 Boff = Boffs[0]; 1071 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1072 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1073 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1074 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1075 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1076 ierr = PetscFree(notme);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1082 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1083 { 1084 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 /* do nondiagonal part */ 1089 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1090 /* send it on its way */ 1091 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1092 /* do local part */ 1093 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1094 /* receive remote parts */ 1095 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* 1100 This only works correctly for square matrices where the subblock A->A is the 1101 diagonal block 1102 */ 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1105 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1106 { 1107 PetscErrorCode ierr; 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 1110 PetscFunctionBegin; 1111 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1112 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1113 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatScale_MPIAIJ" 1119 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1120 { 1121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1122 PetscErrorCode ierr; 1123 1124 PetscFunctionBegin; 1125 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1126 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatDestroy_Redundant" 1132 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1133 { 1134 PetscErrorCode ierr; 1135 Mat_Redundant *redund = *redundant; 1136 PetscInt i; 1137 1138 PetscFunctionBegin; 1139 *redundant = NULL; 1140 if (redund){ 1141 if (redund->matseq) { /* via MatGetSubMatrices() */ 1142 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1143 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1144 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1145 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1146 } else { 1147 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1148 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1149 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1150 for (i=0; i<redund->nrecvs; i++) { 1151 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1152 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1153 } 1154 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1155 } 1156 1157 if (redund->psubcomm) { 1158 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1159 } 1160 ierr = PetscFree(redund);CHKERRQ(ierr); 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatDestroy_MPIAIJ" 1167 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1168 { 1169 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1170 PetscErrorCode ierr; 1171 1172 PetscFunctionBegin; 1173 #if defined(PETSC_USE_LOG) 1174 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1175 #endif 1176 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1177 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1178 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1179 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1180 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1181 #if defined(PETSC_USE_CTABLE) 1182 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1183 #else 1184 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1185 #endif 1186 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1187 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1188 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1189 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1190 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1191 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1192 1193 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1194 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1195 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1196 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1197 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1198 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1199 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1200 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1201 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1208 { 1209 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1210 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1211 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1212 PetscErrorCode ierr; 1213 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1214 int fd; 1215 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1216 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1217 PetscScalar *column_values; 1218 PetscInt message_count,flowcontrolcount; 1219 FILE *file; 1220 1221 PetscFunctionBegin; 1222 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1223 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1224 nz = A->nz + B->nz; 1225 if (!rank) { 1226 header[0] = MAT_FILE_CLASSID; 1227 header[1] = mat->rmap->N; 1228 header[2] = mat->cmap->N; 1229 1230 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1231 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1232 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1233 /* get largest number of rows any processor has */ 1234 rlen = mat->rmap->n; 1235 range = mat->rmap->range; 1236 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1237 } else { 1238 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 rlen = mat->rmap->n; 1240 } 1241 1242 /* load up the local row counts */ 1243 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1244 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1245 1246 /* store the row lengths to the file */ 1247 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1248 if (!rank) { 1249 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1250 for (i=1; i<size; i++) { 1251 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1252 rlen = range[i+1] - range[i]; 1253 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1255 } 1256 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1257 } else { 1258 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1259 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1263 1264 /* load up the local column indices */ 1265 nzmax = nz; /* th processor needs space a largest processor needs */ 1266 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1268 cnt = 0; 1269 for (i=0; i<mat->rmap->n; i++) { 1270 for (j=B->i[i]; j<B->i[i+1]; j++) { 1271 if ((col = garray[B->j[j]]) > cstart) break; 1272 column_indices[cnt++] = col; 1273 } 1274 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1275 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1276 } 1277 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1278 1279 /* store the column indices to the file */ 1280 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1281 if (!rank) { 1282 MPI_Status status; 1283 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1284 for (i=1; i<size; i++) { 1285 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1286 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1287 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1288 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1289 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1290 } 1291 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1292 } else { 1293 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1294 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1296 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1297 } 1298 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1299 1300 /* load up the local column values */ 1301 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1302 cnt = 0; 1303 for (i=0; i<mat->rmap->n; i++) { 1304 for (j=B->i[i]; j<B->i[i+1]; j++) { 1305 if (garray[B->j[j]] > cstart) break; 1306 column_values[cnt++] = B->a[j]; 1307 } 1308 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1309 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1310 } 1311 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1312 1313 /* store the column values to the file */ 1314 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1315 if (!rank) { 1316 MPI_Status status; 1317 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1318 for (i=1; i<size; i++) { 1319 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1320 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1321 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1322 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1323 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1324 } 1325 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1326 } else { 1327 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1328 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1329 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1330 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1331 } 1332 ierr = PetscFree(column_values);CHKERRQ(ierr); 1333 1334 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1335 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 #include <petscdraw.h> 1340 #undef __FUNCT__ 1341 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1342 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1343 { 1344 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1345 PetscErrorCode ierr; 1346 PetscMPIInt rank = aij->rank,size = aij->size; 1347 PetscBool isdraw,iascii,isbinary; 1348 PetscViewer sviewer; 1349 PetscViewerFormat format; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1353 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1354 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1355 if (iascii) { 1356 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1357 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1358 MatInfo info; 1359 PetscBool inodes; 1360 1361 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1362 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1363 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1364 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1365 if (!inodes) { 1366 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1367 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1368 } else { 1369 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1370 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1371 } 1372 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1373 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1374 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1375 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1376 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1377 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1378 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1379 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1382 PetscInt inodecount,inodelimit,*inodes; 1383 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1384 if (inodes) { 1385 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1391 PetscFunctionReturn(0); 1392 } 1393 } else if (isbinary) { 1394 if (size == 1) { 1395 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1396 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1397 } else { 1398 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1399 } 1400 PetscFunctionReturn(0); 1401 } else if (isdraw) { 1402 PetscDraw draw; 1403 PetscBool isnull; 1404 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1405 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1406 } 1407 1408 { 1409 /* assemble the entire matrix onto first processor. */ 1410 Mat A; 1411 Mat_SeqAIJ *Aloc; 1412 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1413 MatScalar *a; 1414 1415 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1418 } else { 1419 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1420 } 1421 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1422 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1423 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1424 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1425 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1426 1427 /* copy over the A part */ 1428 Aloc = (Mat_SeqAIJ*)aij->A->data; 1429 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1430 row = mat->rmap->rstart; 1431 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1432 for (i=0; i<m; i++) { 1433 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1434 row++; 1435 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1436 } 1437 aj = Aloc->j; 1438 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1439 1440 /* copy over the B part */ 1441 Aloc = (Mat_SeqAIJ*)aij->B->data; 1442 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1443 row = mat->rmap->rstart; 1444 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1445 ct = cols; 1446 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1447 for (i=0; i<m; i++) { 1448 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1449 row++; 1450 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1451 } 1452 ierr = PetscFree(ct);CHKERRQ(ierr); 1453 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1454 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1455 /* 1456 Everyone has to call to draw the matrix since the graphics waits are 1457 synchronized across all processors that share the PetscDraw object 1458 */ 1459 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1460 if (!rank) { 1461 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1462 } 1463 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1464 ierr = MatDestroy(&A);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatView_MPIAIJ" 1471 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1472 { 1473 PetscErrorCode ierr; 1474 PetscBool iascii,isdraw,issocket,isbinary; 1475 1476 PetscFunctionBegin; 1477 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1478 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1479 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1480 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1481 if (iascii || isdraw || isbinary || issocket) { 1482 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatSOR_MPIAIJ" 1489 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1490 { 1491 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1492 PetscErrorCode ierr; 1493 Vec bb1 = 0; 1494 PetscBool hasop; 1495 1496 PetscFunctionBegin; 1497 if (flag == SOR_APPLY_UPPER) { 1498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1503 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1504 } 1505 1506 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1507 if (flag & SOR_ZERO_INITIAL_GUESS) { 1508 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1509 its--; 1510 } 1511 1512 while (its--) { 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 1516 /* update rhs: bb1 = bb - B*x */ 1517 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1518 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1519 1520 /* local sweep */ 1521 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1522 } 1523 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1524 if (flag & SOR_ZERO_INITIAL_GUESS) { 1525 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1526 its--; 1527 } 1528 while (its--) { 1529 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1530 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1531 1532 /* update rhs: bb1 = bb - B*x */ 1533 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1534 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1535 1536 /* local sweep */ 1537 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1538 } 1539 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1540 if (flag & SOR_ZERO_INITIAL_GUESS) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 its--; 1543 } 1544 while (its--) { 1545 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1546 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 1548 /* update rhs: bb1 = bb - B*x */ 1549 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1550 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1551 1552 /* local sweep */ 1553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1554 } 1555 } else if (flag & SOR_EISENSTAT) { 1556 Vec xx1; 1557 1558 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1560 1561 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1562 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 if (!mat->diag) { 1564 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1565 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1566 } 1567 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1568 if (hasop) { 1569 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1570 } else { 1571 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1572 } 1573 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1574 1575 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1576 1577 /* local sweep */ 1578 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1579 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1580 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1581 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1582 1583 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatPermute_MPIAIJ" 1589 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1590 { 1591 Mat aA,aB,Aperm; 1592 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1593 PetscScalar *aa,*ba; 1594 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1595 PetscSF rowsf,sf; 1596 IS parcolp = NULL; 1597 PetscBool done; 1598 PetscErrorCode ierr; 1599 1600 PetscFunctionBegin; 1601 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1602 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1603 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1604 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1605 1606 /* Invert row permutation to find out where my rows should go */ 1607 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1608 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1609 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1610 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1611 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1612 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1613 1614 /* Invert column permutation to find out where my columns should go */ 1615 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1616 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1617 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1618 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1619 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1620 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1621 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1622 1623 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1624 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1625 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1626 1627 /* Find out where my gcols should go */ 1628 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1629 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1630 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1631 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1632 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1633 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1634 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1636 1637 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1638 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1639 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 PetscInt row = rdest[i],rowner; 1642 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1643 for (j=ai[i]; j<ai[i+1]; j++) { 1644 PetscInt cowner,col = cdest[aj[j]]; 1645 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1646 if (rowner == cowner) dnnz[i]++; 1647 else onnz[i]++; 1648 } 1649 for (j=bi[i]; j<bi[i+1]; j++) { 1650 PetscInt cowner,col = gcdest[bj[j]]; 1651 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1652 if (rowner == cowner) dnnz[i]++; 1653 else onnz[i]++; 1654 } 1655 } 1656 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1657 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1658 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1659 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1660 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1661 1662 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1663 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1664 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1665 for (i=0; i<m; i++) { 1666 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1667 PetscInt j0,rowlen; 1668 rowlen = ai[i+1] - ai[i]; 1669 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1670 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1671 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1672 } 1673 rowlen = bi[i+1] - bi[i]; 1674 for (j0=j=0; j<rowlen; j0=j) { 1675 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1676 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1677 } 1678 } 1679 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1680 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1681 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1682 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1683 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1684 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1685 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1686 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1687 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1688 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1689 *B = Aperm; 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1695 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1696 { 1697 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1698 Mat A = mat->A,B = mat->B; 1699 PetscErrorCode ierr; 1700 PetscReal isend[5],irecv[5]; 1701 1702 PetscFunctionBegin; 1703 info->block_size = 1.0; 1704 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1705 1706 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1707 isend[3] = info->memory; isend[4] = info->mallocs; 1708 1709 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1710 1711 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1712 isend[3] += info->memory; isend[4] += info->mallocs; 1713 if (flag == MAT_LOCAL) { 1714 info->nz_used = isend[0]; 1715 info->nz_allocated = isend[1]; 1716 info->nz_unneeded = isend[2]; 1717 info->memory = isend[3]; 1718 info->mallocs = isend[4]; 1719 } else if (flag == MAT_GLOBAL_MAX) { 1720 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1721 1722 info->nz_used = irecv[0]; 1723 info->nz_allocated = irecv[1]; 1724 info->nz_unneeded = irecv[2]; 1725 info->memory = irecv[3]; 1726 info->mallocs = irecv[4]; 1727 } else if (flag == MAT_GLOBAL_SUM) { 1728 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1729 1730 info->nz_used = irecv[0]; 1731 info->nz_allocated = irecv[1]; 1732 info->nz_unneeded = irecv[2]; 1733 info->memory = irecv[3]; 1734 info->mallocs = irecv[4]; 1735 } 1736 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1737 info->fill_ratio_needed = 0; 1738 info->factor_mallocs = 0; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 #undef __FUNCT__ 1743 #define __FUNCT__ "MatSetOption_MPIAIJ" 1744 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1745 { 1746 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 switch (op) { 1751 case MAT_NEW_NONZERO_LOCATIONS: 1752 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1753 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1754 case MAT_KEEP_NONZERO_PATTERN: 1755 case MAT_NEW_NONZERO_LOCATION_ERR: 1756 case MAT_USE_INODES: 1757 case MAT_IGNORE_ZERO_ENTRIES: 1758 MatCheckPreallocated(A,1); 1759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1760 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_ROW_ORIENTED: 1763 a->roworiented = flg; 1764 1765 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1766 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1767 break; 1768 case MAT_NEW_DIAGONALS: 1769 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1770 break; 1771 case MAT_IGNORE_OFF_PROC_ENTRIES: 1772 a->donotstash = flg; 1773 break; 1774 case MAT_SPD: 1775 A->spd_set = PETSC_TRUE; 1776 A->spd = flg; 1777 if (flg) { 1778 A->symmetric = PETSC_TRUE; 1779 A->structurally_symmetric = PETSC_TRUE; 1780 A->symmetric_set = PETSC_TRUE; 1781 A->structurally_symmetric_set = PETSC_TRUE; 1782 } 1783 break; 1784 case MAT_SYMMETRIC: 1785 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1786 break; 1787 case MAT_STRUCTURALLY_SYMMETRIC: 1788 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1789 break; 1790 case MAT_HERMITIAN: 1791 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1792 break; 1793 case MAT_SYMMETRY_ETERNAL: 1794 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1795 break; 1796 default: 1797 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1798 } 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatGetRow_MPIAIJ" 1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1805 { 1806 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1807 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1808 PetscErrorCode ierr; 1809 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1810 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1811 PetscInt *cmap,*idx_p; 1812 1813 PetscFunctionBegin; 1814 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1815 mat->getrowactive = PETSC_TRUE; 1816 1817 if (!mat->rowvalues && (idx || v)) { 1818 /* 1819 allocate enough space to hold information from the longest row. 1820 */ 1821 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1822 PetscInt max = 1,tmp; 1823 for (i=0; i<matin->rmap->n; i++) { 1824 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1825 if (max < tmp) max = tmp; 1826 } 1827 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1828 } 1829 1830 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1831 lrow = row - rstart; 1832 1833 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1834 if (!v) {pvA = 0; pvB = 0;} 1835 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1836 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1837 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1838 nztot = nzA + nzB; 1839 1840 cmap = mat->garray; 1841 if (v || idx) { 1842 if (nztot) { 1843 /* Sort by increasing column numbers, assuming A and B already sorted */ 1844 PetscInt imark = -1; 1845 if (v) { 1846 *v = v_p = mat->rowvalues; 1847 for (i=0; i<nzB; i++) { 1848 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1849 else break; 1850 } 1851 imark = i; 1852 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1853 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1854 } 1855 if (idx) { 1856 *idx = idx_p = mat->rowindices; 1857 if (imark > -1) { 1858 for (i=0; i<imark; i++) { 1859 idx_p[i] = cmap[cworkB[i]]; 1860 } 1861 } else { 1862 for (i=0; i<nzB; i++) { 1863 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1864 else break; 1865 } 1866 imark = i; 1867 } 1868 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1869 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1870 } 1871 } else { 1872 if (idx) *idx = 0; 1873 if (v) *v = 0; 1874 } 1875 } 1876 *nz = nztot; 1877 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1878 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1879 PetscFunctionReturn(0); 1880 } 1881 1882 #undef __FUNCT__ 1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1885 { 1886 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1887 1888 PetscFunctionBegin; 1889 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1890 aij->getrowactive = PETSC_FALSE; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatNorm_MPIAIJ" 1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1897 { 1898 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1899 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1900 PetscErrorCode ierr; 1901 PetscInt i,j,cstart = mat->cmap->rstart; 1902 PetscReal sum = 0.0; 1903 MatScalar *v; 1904 1905 PetscFunctionBegin; 1906 if (aij->size == 1) { 1907 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1908 } else { 1909 if (type == NORM_FROBENIUS) { 1910 v = amat->a; 1911 for (i=0; i<amat->nz; i++) { 1912 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1913 } 1914 v = bmat->a; 1915 for (i=0; i<bmat->nz; i++) { 1916 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1917 } 1918 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1919 *norm = PetscSqrtReal(*norm); 1920 } else if (type == NORM_1) { /* max column norm */ 1921 PetscReal *tmp,*tmp2; 1922 PetscInt *jj,*garray = aij->garray; 1923 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1924 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1925 *norm = 0.0; 1926 v = amat->a; jj = amat->j; 1927 for (j=0; j<amat->nz; j++) { 1928 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1929 } 1930 v = bmat->a; jj = bmat->j; 1931 for (j=0; j<bmat->nz; j++) { 1932 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1933 } 1934 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1935 for (j=0; j<mat->cmap->N; j++) { 1936 if (tmp2[j] > *norm) *norm = tmp2[j]; 1937 } 1938 ierr = PetscFree(tmp);CHKERRQ(ierr); 1939 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1940 } else if (type == NORM_INFINITY) { /* max row norm */ 1941 PetscReal ntemp = 0.0; 1942 for (j=0; j<aij->A->rmap->n; j++) { 1943 v = amat->a + amat->i[j]; 1944 sum = 0.0; 1945 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1946 sum += PetscAbsScalar(*v); v++; 1947 } 1948 v = bmat->a + bmat->i[j]; 1949 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1950 sum += PetscAbsScalar(*v); v++; 1951 } 1952 if (sum > ntemp) ntemp = sum; 1953 } 1954 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1955 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1956 } 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatTranspose_MPIAIJ" 1962 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1963 { 1964 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1965 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1966 PetscErrorCode ierr; 1967 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1968 PetscInt cstart = A->cmap->rstart,ncol; 1969 Mat B; 1970 MatScalar *array; 1971 1972 PetscFunctionBegin; 1973 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1974 1975 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1976 ai = Aloc->i; aj = Aloc->j; 1977 bi = Bloc->i; bj = Bloc->j; 1978 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1979 PetscInt *d_nnz,*g_nnz,*o_nnz; 1980 PetscSFNode *oloc; 1981 PETSC_UNUSED PetscSF sf; 1982 1983 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1984 /* compute d_nnz for preallocation */ 1985 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1986 for (i=0; i<ai[ma]; i++) { 1987 d_nnz[aj[i]]++; 1988 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1989 } 1990 /* compute local off-diagonal contributions */ 1991 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1992 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1993 /* map those to global */ 1994 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1995 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1996 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1997 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1998 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1999 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2000 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2001 2002 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2003 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2004 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2005 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2006 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2007 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2008 } else { 2009 B = *matout; 2010 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2011 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2012 } 2013 2014 /* copy over the A part */ 2015 array = Aloc->a; 2016 row = A->rmap->rstart; 2017 for (i=0; i<ma; i++) { 2018 ncol = ai[i+1]-ai[i]; 2019 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2020 row++; 2021 array += ncol; aj += ncol; 2022 } 2023 aj = Aloc->j; 2024 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2025 2026 /* copy over the B part */ 2027 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2028 array = Bloc->a; 2029 row = A->rmap->rstart; 2030 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2031 cols_tmp = cols; 2032 for (i=0; i<mb; i++) { 2033 ncol = bi[i+1]-bi[i]; 2034 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2035 row++; 2036 array += ncol; cols_tmp += ncol; 2037 } 2038 ierr = PetscFree(cols);CHKERRQ(ierr); 2039 2040 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2041 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2042 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2043 *matout = B; 2044 } else { 2045 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2046 } 2047 PetscFunctionReturn(0); 2048 } 2049 2050 #undef __FUNCT__ 2051 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2052 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2053 { 2054 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2055 Mat a = aij->A,b = aij->B; 2056 PetscErrorCode ierr; 2057 PetscInt s1,s2,s3; 2058 2059 PetscFunctionBegin; 2060 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2061 if (rr) { 2062 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2063 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2064 /* Overlap communication with computation. */ 2065 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2066 } 2067 if (ll) { 2068 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2069 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2070 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2071 } 2072 /* scale the diagonal block */ 2073 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2074 2075 if (rr) { 2076 /* Do a scatter end and then right scale the off-diagonal block */ 2077 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2078 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 #undef __FUNCT__ 2084 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2085 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2086 { 2087 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2088 PetscErrorCode ierr; 2089 2090 PetscFunctionBegin; 2091 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2092 PetscFunctionReturn(0); 2093 } 2094 2095 #undef __FUNCT__ 2096 #define __FUNCT__ "MatEqual_MPIAIJ" 2097 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2098 { 2099 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2100 Mat a,b,c,d; 2101 PetscBool flg; 2102 PetscErrorCode ierr; 2103 2104 PetscFunctionBegin; 2105 a = matA->A; b = matA->B; 2106 c = matB->A; d = matB->B; 2107 2108 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2109 if (flg) { 2110 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2111 } 2112 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatCopy_MPIAIJ" 2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2119 { 2120 PetscErrorCode ierr; 2121 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2122 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2123 2124 PetscFunctionBegin; 2125 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2126 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2127 /* because of the column compression in the off-processor part of the matrix a->B, 2128 the number of columns in a->B and b->B may be different, hence we cannot call 2129 the MatCopy() directly on the two parts. If need be, we can provide a more 2130 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2131 then copying the submatrices */ 2132 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2133 } else { 2134 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2135 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2136 } 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatSetUp_MPIAIJ" 2142 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2143 { 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2154 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2155 { 2156 PetscInt i,m=Y->rmap->N; 2157 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2158 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2159 const PetscInt *xi = x->i,*yi = y->i; 2160 2161 PetscFunctionBegin; 2162 /* Set the number of nonzeros in the new matrix */ 2163 for (i=0; i<m; i++) { 2164 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2165 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2166 nnz[i] = 0; 2167 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2168 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2169 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2170 nnz[i]++; 2171 } 2172 for (; k<nzy; k++) nnz[i]++; 2173 } 2174 PetscFunctionReturn(0); 2175 } 2176 2177 #undef __FUNCT__ 2178 #define __FUNCT__ "MatAXPY_MPIAIJ" 2179 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2180 { 2181 PetscErrorCode ierr; 2182 PetscInt i; 2183 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2184 PetscBLASInt bnz,one=1; 2185 Mat_SeqAIJ *x,*y; 2186 2187 PetscFunctionBegin; 2188 if (str == SAME_NONZERO_PATTERN) { 2189 PetscScalar alpha = a; 2190 x = (Mat_SeqAIJ*)xx->A->data; 2191 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2192 y = (Mat_SeqAIJ*)yy->A->data; 2193 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2194 x = (Mat_SeqAIJ*)xx->B->data; 2195 y = (Mat_SeqAIJ*)yy->B->data; 2196 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2197 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2198 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2199 } else if (str == SUBSET_NONZERO_PATTERN) { 2200 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2201 2202 x = (Mat_SeqAIJ*)xx->B->data; 2203 y = (Mat_SeqAIJ*)yy->B->data; 2204 if (y->xtoy && y->XtoY != xx->B) { 2205 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2206 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2207 } 2208 if (!y->xtoy) { /* get xtoy */ 2209 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2210 y->XtoY = xx->B; 2211 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2212 } 2213 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2214 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2215 } else { 2216 Mat B; 2217 PetscInt *nnz_d,*nnz_o; 2218 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2219 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2220 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2221 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2222 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2223 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2224 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2225 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2226 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2227 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2228 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2229 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2230 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2231 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatConjugate_MPIAIJ" 2240 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2241 { 2242 #if defined(PETSC_USE_COMPLEX) 2243 PetscErrorCode ierr; 2244 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2245 2246 PetscFunctionBegin; 2247 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2248 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2249 #else 2250 PetscFunctionBegin; 2251 #endif 2252 PetscFunctionReturn(0); 2253 } 2254 2255 #undef __FUNCT__ 2256 #define __FUNCT__ "MatRealPart_MPIAIJ" 2257 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2258 { 2259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2264 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2270 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2271 { 2272 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2273 PetscErrorCode ierr; 2274 2275 PetscFunctionBegin; 2276 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2277 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 #if defined(PETSC_HAVE_PBGL) 2282 2283 #include <boost/parallel/mpi/bsp_process_group.hpp> 2284 #include <boost/graph/distributed/ilu_default_graph.hpp> 2285 #include <boost/graph/distributed/ilu_0_block.hpp> 2286 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2287 #include <boost/graph/distributed/petsc/interface.hpp> 2288 #include <boost/multi_array.hpp> 2289 #include <boost/parallel/distributed_property_map->hpp> 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2293 /* 2294 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2295 */ 2296 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2297 { 2298 namespace petsc = boost::distributed::petsc; 2299 2300 namespace graph_dist = boost::graph::distributed; 2301 using boost::graph::distributed::ilu_default::process_group_type; 2302 using boost::graph::ilu_permuted; 2303 2304 PetscBool row_identity, col_identity; 2305 PetscContainer c; 2306 PetscInt m, n, M, N; 2307 PetscErrorCode ierr; 2308 2309 PetscFunctionBegin; 2310 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2311 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2312 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2313 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2314 2315 process_group_type pg; 2316 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2317 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2318 lgraph_type& level_graph = *lgraph_p; 2319 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2320 2321 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2322 ilu_permuted(level_graph); 2323 2324 /* put together the new matrix */ 2325 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2326 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2327 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2328 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2329 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2330 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2331 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2332 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2333 2334 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2335 ierr = PetscContainerSetPointer(c, lgraph_p); 2336 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2337 ierr = PetscContainerDestroy(&c); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 #undef __FUNCT__ 2342 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2343 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2344 { 2345 PetscFunctionBegin; 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatSolve_MPIAIJ" 2351 /* 2352 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2353 */ 2354 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2355 { 2356 namespace graph_dist = boost::graph::distributed; 2357 2358 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2359 lgraph_type *lgraph_p; 2360 PetscContainer c; 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2365 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2366 ierr = VecCopy(b, x);CHKERRQ(ierr); 2367 2368 PetscScalar *array_x; 2369 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2370 PetscInt sx; 2371 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2372 2373 PetscScalar *array_b; 2374 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2375 PetscInt sb; 2376 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2377 2378 lgraph_type& level_graph = *lgraph_p; 2379 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2380 2381 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2382 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2383 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2384 2385 typedef boost::iterator_property_map<array_ref_type::iterator, 2386 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2387 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2388 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2389 2390 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2391 PetscFunctionReturn(0); 2392 } 2393 #endif 2394 2395 2396 #undef __FUNCT__ 2397 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2398 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2399 { 2400 PetscMPIInt rank,size; 2401 MPI_Comm comm; 2402 PetscErrorCode ierr; 2403 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2404 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2405 PetscInt *rowrange = mat->rmap->range; 2406 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2407 Mat A = aij->A,B=aij->B,C=*matredundant; 2408 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2409 PetscScalar *sbuf_a; 2410 PetscInt nzlocal=a->nz+b->nz; 2411 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2412 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2413 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2414 MatScalar *aworkA,*aworkB; 2415 PetscScalar *vals; 2416 PetscMPIInt tag1,tag2,tag3,imdex; 2417 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2418 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2419 MPI_Status recv_status,*send_status; 2420 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2421 PetscInt **rbuf_j=NULL; 2422 PetscScalar **rbuf_a=NULL; 2423 Mat_Redundant *redund =NULL; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2428 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2429 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2430 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2431 2432 if (reuse == MAT_REUSE_MATRIX) { 2433 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2434 if (subsize == 1) { 2435 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2436 redund = c->redundant; 2437 } else { 2438 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2439 redund = c->redundant; 2440 } 2441 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2442 2443 nsends = redund->nsends; 2444 nrecvs = redund->nrecvs; 2445 send_rank = redund->send_rank; 2446 recv_rank = redund->recv_rank; 2447 sbuf_nz = redund->sbuf_nz; 2448 rbuf_nz = redund->rbuf_nz; 2449 sbuf_j = redund->sbuf_j; 2450 sbuf_a = redund->sbuf_a; 2451 rbuf_j = redund->rbuf_j; 2452 rbuf_a = redund->rbuf_a; 2453 } 2454 2455 if (reuse == MAT_INITIAL_MATRIX) { 2456 PetscInt nleftover,np_subcomm; 2457 2458 /* get the destination processors' id send_rank, nsends and nrecvs */ 2459 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2460 2461 np_subcomm = size/nsubcomm; 2462 nleftover = size - nsubcomm*np_subcomm; 2463 2464 /* block of codes below is specific for INTERLACED */ 2465 /* ------------------------------------------------*/ 2466 nsends = 0; nrecvs = 0; 2467 for (i=0; i<size; i++) { 2468 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2469 send_rank[nsends++] = i; 2470 recv_rank[nrecvs++] = i; 2471 } 2472 } 2473 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2474 i = size-nleftover-1; 2475 j = 0; 2476 while (j < nsubcomm - nleftover) { 2477 send_rank[nsends++] = i; 2478 i--; j++; 2479 } 2480 } 2481 2482 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2483 for (i=0; i<nleftover; i++) { 2484 recv_rank[nrecvs++] = size-nleftover+i; 2485 } 2486 } 2487 /*----------------------------------------------*/ 2488 2489 /* allocate sbuf_j, sbuf_a */ 2490 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2491 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2492 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2493 /* 2494 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2495 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2496 */ 2497 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2498 2499 /* copy mat's local entries into the buffers */ 2500 if (reuse == MAT_INITIAL_MATRIX) { 2501 rownz_max = 0; 2502 rptr = sbuf_j; 2503 cols = sbuf_j + rend-rstart + 1; 2504 vals = sbuf_a; 2505 rptr[0] = 0; 2506 for (i=0; i<rend-rstart; i++) { 2507 row = i + rstart; 2508 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2509 ncols = nzA + nzB; 2510 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2511 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2512 /* load the column indices for this row into cols */ 2513 lwrite = 0; 2514 for (l=0; l<nzB; l++) { 2515 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2516 vals[lwrite] = aworkB[l]; 2517 cols[lwrite++] = ctmp; 2518 } 2519 } 2520 for (l=0; l<nzA; l++) { 2521 vals[lwrite] = aworkA[l]; 2522 cols[lwrite++] = cstart + cworkA[l]; 2523 } 2524 for (l=0; l<nzB; l++) { 2525 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2526 vals[lwrite] = aworkB[l]; 2527 cols[lwrite++] = ctmp; 2528 } 2529 } 2530 vals += ncols; 2531 cols += ncols; 2532 rptr[i+1] = rptr[i] + ncols; 2533 if (rownz_max < ncols) rownz_max = ncols; 2534 } 2535 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2536 } else { /* only copy matrix values into sbuf_a */ 2537 rptr = sbuf_j; 2538 vals = sbuf_a; 2539 rptr[0] = 0; 2540 for (i=0; i<rend-rstart; i++) { 2541 row = i + rstart; 2542 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2543 ncols = nzA + nzB; 2544 cworkB = b->j + b->i[i]; 2545 aworkA = a->a + a->i[i]; 2546 aworkB = b->a + b->i[i]; 2547 lwrite = 0; 2548 for (l=0; l<nzB; l++) { 2549 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2550 } 2551 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2552 for (l=0; l<nzB; l++) { 2553 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2554 } 2555 vals += ncols; 2556 rptr[i+1] = rptr[i] + ncols; 2557 } 2558 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2559 2560 /* send nzlocal to others, and recv other's nzlocal */ 2561 /*--------------------------------------------------*/ 2562 if (reuse == MAT_INITIAL_MATRIX) { 2563 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2564 2565 s_waits2 = s_waits3 + nsends; 2566 s_waits1 = s_waits2 + nsends; 2567 r_waits1 = s_waits1 + nsends; 2568 r_waits2 = r_waits1 + nrecvs; 2569 r_waits3 = r_waits2 + nrecvs; 2570 } else { 2571 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2572 2573 r_waits3 = s_waits3 + nsends; 2574 } 2575 2576 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2577 if (reuse == MAT_INITIAL_MATRIX) { 2578 /* get new tags to keep the communication clean */ 2579 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2580 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2581 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2582 2583 /* post receives of other's nzlocal */ 2584 for (i=0; i<nrecvs; i++) { 2585 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2586 } 2587 /* send nzlocal to others */ 2588 for (i=0; i<nsends; i++) { 2589 sbuf_nz[i] = nzlocal; 2590 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2591 } 2592 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2593 count = nrecvs; 2594 while (count) { 2595 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2596 2597 recv_rank[imdex] = recv_status.MPI_SOURCE; 2598 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2599 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2600 2601 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2602 2603 rbuf_nz[imdex] += i + 2; 2604 2605 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2606 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2607 count--; 2608 } 2609 /* wait on sends of nzlocal */ 2610 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2611 /* send mat->i,j to others, and recv from other's */ 2612 /*------------------------------------------------*/ 2613 for (i=0; i<nsends; i++) { 2614 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2615 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2616 } 2617 /* wait on receives of mat->i,j */ 2618 /*------------------------------*/ 2619 count = nrecvs; 2620 while (count) { 2621 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2622 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2623 count--; 2624 } 2625 /* wait on sends of mat->i,j */ 2626 /*---------------------------*/ 2627 if (nsends) { 2628 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2629 } 2630 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2631 2632 /* post receives, send and receive mat->a */ 2633 /*----------------------------------------*/ 2634 for (imdex=0; imdex<nrecvs; imdex++) { 2635 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2636 } 2637 for (i=0; i<nsends; i++) { 2638 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2639 } 2640 count = nrecvs; 2641 while (count) { 2642 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2643 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2644 count--; 2645 } 2646 if (nsends) { 2647 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2648 } 2649 2650 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2651 2652 /* create redundant matrix */ 2653 /*-------------------------*/ 2654 if (reuse == MAT_INITIAL_MATRIX) { 2655 const PetscInt *range; 2656 PetscInt rstart_sub,rend_sub,mloc_sub; 2657 2658 /* compute rownz_max for preallocation */ 2659 for (imdex=0; imdex<nrecvs; imdex++) { 2660 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2661 rptr = rbuf_j[imdex]; 2662 for (i=0; i<j; i++) { 2663 ncols = rptr[i+1] - rptr[i]; 2664 if (rownz_max < ncols) rownz_max = ncols; 2665 } 2666 } 2667 2668 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2669 2670 /* get local size of redundant matrix 2671 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2672 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2673 rstart_sub = range[nsubcomm*subrank]; 2674 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2675 rend_sub = range[nsubcomm*(subrank+1)]; 2676 } else { 2677 rend_sub = mat->rmap->N; 2678 } 2679 mloc_sub = rend_sub - rstart_sub; 2680 2681 if (M == N) { 2682 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2683 } else { /* non-square matrix */ 2684 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2685 } 2686 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2687 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2688 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2689 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2690 } else { 2691 C = *matredundant; 2692 } 2693 2694 /* insert local matrix entries */ 2695 rptr = sbuf_j; 2696 cols = sbuf_j + rend-rstart + 1; 2697 vals = sbuf_a; 2698 for (i=0; i<rend-rstart; i++) { 2699 row = i + rstart; 2700 ncols = rptr[i+1] - rptr[i]; 2701 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2702 vals += ncols; 2703 cols += ncols; 2704 } 2705 /* insert received matrix entries */ 2706 for (imdex=0; imdex<nrecvs; imdex++) { 2707 rstart = rowrange[recv_rank[imdex]]; 2708 rend = rowrange[recv_rank[imdex]+1]; 2709 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2710 rptr = rbuf_j[imdex]; 2711 cols = rbuf_j[imdex] + rend-rstart + 1; 2712 vals = rbuf_a[imdex]; 2713 for (i=0; i<rend-rstart; i++) { 2714 row = i + rstart; 2715 ncols = rptr[i+1] - rptr[i]; 2716 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2717 vals += ncols; 2718 cols += ncols; 2719 } 2720 } 2721 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2722 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2723 2724 if (reuse == MAT_INITIAL_MATRIX) { 2725 *matredundant = C; 2726 2727 /* create a supporting struct and attach it to C for reuse */ 2728 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2729 if (subsize == 1) { 2730 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2731 c->redundant = redund; 2732 } else { 2733 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2734 c->redundant = redund; 2735 } 2736 2737 redund->nzlocal = nzlocal; 2738 redund->nsends = nsends; 2739 redund->nrecvs = nrecvs; 2740 redund->send_rank = send_rank; 2741 redund->recv_rank = recv_rank; 2742 redund->sbuf_nz = sbuf_nz; 2743 redund->rbuf_nz = rbuf_nz; 2744 redund->sbuf_j = sbuf_j; 2745 redund->sbuf_a = sbuf_a; 2746 redund->rbuf_j = rbuf_j; 2747 redund->rbuf_a = rbuf_a; 2748 redund->psubcomm = NULL; 2749 } 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2755 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2756 { 2757 PetscErrorCode ierr; 2758 MPI_Comm comm; 2759 PetscMPIInt size,subsize; 2760 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2761 Mat_Redundant *redund=NULL; 2762 PetscSubcomm psubcomm=NULL; 2763 MPI_Comm subcomm_in=subcomm; 2764 Mat *matseq; 2765 IS isrow,iscol; 2766 2767 PetscFunctionBegin; 2768 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2769 if (reuse == MAT_INITIAL_MATRIX) { 2770 /* create psubcomm, then get subcomm */ 2771 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2773 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2774 2775 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2776 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2777 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2778 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2779 subcomm = psubcomm->comm; 2780 } else { /* retrieve psubcomm and subcomm */ 2781 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2782 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2783 if (subsize == 1) { 2784 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2785 redund = c->redundant; 2786 } else { 2787 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2788 redund = c->redundant; 2789 } 2790 psubcomm = redund->psubcomm; 2791 } 2792 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2793 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2794 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2795 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2796 if (subsize == 1) { 2797 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2798 c->redundant->psubcomm = psubcomm; 2799 } else { 2800 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2801 c->redundant->psubcomm = psubcomm ; 2802 } 2803 } 2804 PetscFunctionReturn(0); 2805 } 2806 } 2807 2808 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2809 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2810 if (reuse == MAT_INITIAL_MATRIX) { 2811 /* create a local sequential matrix matseq[0] */ 2812 mloc_sub = PETSC_DECIDE; 2813 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2814 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2815 rstart = rend - mloc_sub; 2816 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2817 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2818 } else { /* reuse == MAT_REUSE_MATRIX */ 2819 if (subsize == 1) { 2820 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2821 redund = c->redundant; 2822 } else { 2823 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2824 redund = c->redundant; 2825 } 2826 2827 isrow = redund->isrow; 2828 iscol = redund->iscol; 2829 matseq = redund->matseq; 2830 } 2831 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2832 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2833 2834 if (reuse == MAT_INITIAL_MATRIX) { 2835 /* create a supporting struct and attach it to C for reuse */ 2836 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2837 if (subsize == 1) { 2838 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2839 c->redundant = redund; 2840 } else { 2841 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2842 c->redundant = redund; 2843 } 2844 redund->isrow = isrow; 2845 redund->iscol = iscol; 2846 redund->matseq = matseq; 2847 redund->psubcomm = psubcomm; 2848 } 2849 PetscFunctionReturn(0); 2850 } 2851 2852 #undef __FUNCT__ 2853 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2854 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2855 { 2856 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2857 PetscErrorCode ierr; 2858 PetscInt i,*idxb = 0; 2859 PetscScalar *va,*vb; 2860 Vec vtmp; 2861 2862 PetscFunctionBegin; 2863 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2864 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2865 if (idx) { 2866 for (i=0; i<A->rmap->n; i++) { 2867 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2868 } 2869 } 2870 2871 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2872 if (idx) { 2873 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2874 } 2875 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2876 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2877 2878 for (i=0; i<A->rmap->n; i++) { 2879 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2880 va[i] = vb[i]; 2881 if (idx) idx[i] = a->garray[idxb[i]]; 2882 } 2883 } 2884 2885 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2886 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2887 ierr = PetscFree(idxb);CHKERRQ(ierr); 2888 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2889 PetscFunctionReturn(0); 2890 } 2891 2892 #undef __FUNCT__ 2893 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2894 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2895 { 2896 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2897 PetscErrorCode ierr; 2898 PetscInt i,*idxb = 0; 2899 PetscScalar *va,*vb; 2900 Vec vtmp; 2901 2902 PetscFunctionBegin; 2903 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2904 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2905 if (idx) { 2906 for (i=0; i<A->cmap->n; i++) { 2907 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2908 } 2909 } 2910 2911 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2912 if (idx) { 2913 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2914 } 2915 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2916 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2917 2918 for (i=0; i<A->rmap->n; i++) { 2919 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2920 va[i] = vb[i]; 2921 if (idx) idx[i] = a->garray[idxb[i]]; 2922 } 2923 } 2924 2925 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2926 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2927 ierr = PetscFree(idxb);CHKERRQ(ierr); 2928 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2929 PetscFunctionReturn(0); 2930 } 2931 2932 #undef __FUNCT__ 2933 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2934 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2935 { 2936 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2937 PetscInt n = A->rmap->n; 2938 PetscInt cstart = A->cmap->rstart; 2939 PetscInt *cmap = mat->garray; 2940 PetscInt *diagIdx, *offdiagIdx; 2941 Vec diagV, offdiagV; 2942 PetscScalar *a, *diagA, *offdiagA; 2943 PetscInt r; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2948 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2949 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2950 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2951 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2952 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2953 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2954 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2955 for (r = 0; r < n; ++r) { 2956 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2957 a[r] = diagA[r]; 2958 idx[r] = cstart + diagIdx[r]; 2959 } else { 2960 a[r] = offdiagA[r]; 2961 idx[r] = cmap[offdiagIdx[r]]; 2962 } 2963 } 2964 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2966 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2967 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2968 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2969 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2975 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2976 { 2977 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2978 PetscInt n = A->rmap->n; 2979 PetscInt cstart = A->cmap->rstart; 2980 PetscInt *cmap = mat->garray; 2981 PetscInt *diagIdx, *offdiagIdx; 2982 Vec diagV, offdiagV; 2983 PetscScalar *a, *diagA, *offdiagA; 2984 PetscInt r; 2985 PetscErrorCode ierr; 2986 2987 PetscFunctionBegin; 2988 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2989 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2990 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2991 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2992 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2993 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2994 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2995 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2996 for (r = 0; r < n; ++r) { 2997 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2998 a[r] = diagA[r]; 2999 idx[r] = cstart + diagIdx[r]; 3000 } else { 3001 a[r] = offdiagA[r]; 3002 idx[r] = cmap[offdiagIdx[r]]; 3003 } 3004 } 3005 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3006 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3007 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3008 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3009 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3010 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3011 PetscFunctionReturn(0); 3012 } 3013 3014 #undef __FUNCT__ 3015 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3016 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3017 { 3018 PetscErrorCode ierr; 3019 Mat *dummy; 3020 3021 PetscFunctionBegin; 3022 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3023 *newmat = *dummy; 3024 ierr = PetscFree(dummy);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 #undef __FUNCT__ 3029 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3030 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3031 { 3032 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3033 PetscErrorCode ierr; 3034 3035 PetscFunctionBegin; 3036 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3037 PetscFunctionReturn(0); 3038 } 3039 3040 #undef __FUNCT__ 3041 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3042 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3043 { 3044 PetscErrorCode ierr; 3045 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3046 3047 PetscFunctionBegin; 3048 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3049 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3050 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 /* -------------------------------------------------------------------*/ 3056 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3057 MatGetRow_MPIAIJ, 3058 MatRestoreRow_MPIAIJ, 3059 MatMult_MPIAIJ, 3060 /* 4*/ MatMultAdd_MPIAIJ, 3061 MatMultTranspose_MPIAIJ, 3062 MatMultTransposeAdd_MPIAIJ, 3063 #if defined(PETSC_HAVE_PBGL) 3064 MatSolve_MPIAIJ, 3065 #else 3066 0, 3067 #endif 3068 0, 3069 0, 3070 /*10*/ 0, 3071 0, 3072 0, 3073 MatSOR_MPIAIJ, 3074 MatTranspose_MPIAIJ, 3075 /*15*/ MatGetInfo_MPIAIJ, 3076 MatEqual_MPIAIJ, 3077 MatGetDiagonal_MPIAIJ, 3078 MatDiagonalScale_MPIAIJ, 3079 MatNorm_MPIAIJ, 3080 /*20*/ MatAssemblyBegin_MPIAIJ, 3081 MatAssemblyEnd_MPIAIJ, 3082 MatSetOption_MPIAIJ, 3083 MatZeroEntries_MPIAIJ, 3084 /*24*/ MatZeroRows_MPIAIJ, 3085 0, 3086 #if defined(PETSC_HAVE_PBGL) 3087 0, 3088 #else 3089 0, 3090 #endif 3091 0, 3092 0, 3093 /*29*/ MatSetUp_MPIAIJ, 3094 #if defined(PETSC_HAVE_PBGL) 3095 0, 3096 #else 3097 0, 3098 #endif 3099 0, 3100 0, 3101 0, 3102 /*34*/ MatDuplicate_MPIAIJ, 3103 0, 3104 0, 3105 0, 3106 0, 3107 /*39*/ MatAXPY_MPIAIJ, 3108 MatGetSubMatrices_MPIAIJ, 3109 MatIncreaseOverlap_MPIAIJ, 3110 MatGetValues_MPIAIJ, 3111 MatCopy_MPIAIJ, 3112 /*44*/ MatGetRowMax_MPIAIJ, 3113 MatScale_MPIAIJ, 3114 0, 3115 MatDiagonalSet_MPIAIJ, 3116 MatZeroRowsColumns_MPIAIJ, 3117 /*49*/ MatSetRandom_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 /*54*/ MatFDColoringCreate_MPIXAIJ, 3123 0, 3124 MatSetUnfactored_MPIAIJ, 3125 MatPermute_MPIAIJ, 3126 0, 3127 /*59*/ MatGetSubMatrix_MPIAIJ, 3128 MatDestroy_MPIAIJ, 3129 MatView_MPIAIJ, 3130 0, 3131 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3132 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3133 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3134 0, 3135 0, 3136 0, 3137 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3138 MatGetRowMinAbs_MPIAIJ, 3139 0, 3140 MatSetColoring_MPIAIJ, 3141 0, 3142 MatSetValuesAdifor_MPIAIJ, 3143 /*75*/ MatFDColoringApply_AIJ, 3144 0, 3145 0, 3146 0, 3147 MatFindZeroDiagonals_MPIAIJ, 3148 /*80*/ 0, 3149 0, 3150 0, 3151 /*83*/ MatLoad_MPIAIJ, 3152 0, 3153 0, 3154 0, 3155 0, 3156 0, 3157 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3158 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3159 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3160 MatPtAP_MPIAIJ_MPIAIJ, 3161 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3162 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3163 0, 3164 0, 3165 0, 3166 0, 3167 /*99*/ 0, 3168 0, 3169 0, 3170 MatConjugate_MPIAIJ, 3171 0, 3172 /*104*/MatSetValuesRow_MPIAIJ, 3173 MatRealPart_MPIAIJ, 3174 MatImaginaryPart_MPIAIJ, 3175 0, 3176 0, 3177 /*109*/0, 3178 MatGetRedundantMatrix_MPIAIJ, 3179 MatGetRowMin_MPIAIJ, 3180 0, 3181 0, 3182 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3183 0, 3184 0, 3185 0, 3186 0, 3187 /*119*/0, 3188 0, 3189 0, 3190 0, 3191 MatGetMultiProcBlock_MPIAIJ, 3192 /*124*/MatFindNonzeroRows_MPIAIJ, 3193 MatGetColumnNorms_MPIAIJ, 3194 MatInvertBlockDiagonal_MPIAIJ, 3195 0, 3196 MatGetSubMatricesParallel_MPIAIJ, 3197 /*129*/0, 3198 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3199 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3200 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3201 0, 3202 /*134*/0, 3203 0, 3204 0, 3205 0, 3206 0, 3207 /*139*/0, 3208 0, 3209 0, 3210 MatFDColoringSetUp_MPIXAIJ 3211 }; 3212 3213 /* ----------------------------------------------------------------------------------------*/ 3214 3215 #undef __FUNCT__ 3216 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3217 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3218 { 3219 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3220 PetscErrorCode ierr; 3221 3222 PetscFunctionBegin; 3223 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3224 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3230 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3231 { 3232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3237 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3238 PetscFunctionReturn(0); 3239 } 3240 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3243 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3244 { 3245 Mat_MPIAIJ *b; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3250 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3251 b = (Mat_MPIAIJ*)B->data; 3252 3253 if (!B->preallocated) { 3254 /* Explicitly create 2 MATSEQAIJ matrices. */ 3255 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3256 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3257 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3258 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3259 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3260 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3261 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3262 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3263 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3264 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3265 } 3266 3267 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3268 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3269 B->preallocated = PETSC_TRUE; 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3275 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3276 { 3277 Mat mat; 3278 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3279 PetscErrorCode ierr; 3280 3281 PetscFunctionBegin; 3282 *newmat = 0; 3283 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3284 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3285 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3286 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3287 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3288 a = (Mat_MPIAIJ*)mat->data; 3289 3290 mat->factortype = matin->factortype; 3291 mat->assembled = PETSC_TRUE; 3292 mat->insertmode = NOT_SET_VALUES; 3293 mat->preallocated = PETSC_TRUE; 3294 3295 a->size = oldmat->size; 3296 a->rank = oldmat->rank; 3297 a->donotstash = oldmat->donotstash; 3298 a->roworiented = oldmat->roworiented; 3299 a->rowindices = 0; 3300 a->rowvalues = 0; 3301 a->getrowactive = PETSC_FALSE; 3302 3303 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3304 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3305 3306 if (oldmat->colmap) { 3307 #if defined(PETSC_USE_CTABLE) 3308 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3309 #else 3310 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3311 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3312 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3313 #endif 3314 } else a->colmap = 0; 3315 if (oldmat->garray) { 3316 PetscInt len; 3317 len = oldmat->B->cmap->n; 3318 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3319 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3320 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3321 } else a->garray = 0; 3322 3323 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3325 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3327 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3329 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3330 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3331 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3332 *newmat = mat; 3333 PetscFunctionReturn(0); 3334 } 3335 3336 3337 3338 #undef __FUNCT__ 3339 #define __FUNCT__ "MatLoad_MPIAIJ" 3340 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3341 { 3342 PetscScalar *vals,*svals; 3343 MPI_Comm comm; 3344 PetscErrorCode ierr; 3345 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3346 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3347 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3348 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3349 PetscInt cend,cstart,n,*rowners,sizesset=1; 3350 int fd; 3351 PetscInt bs = 1; 3352 3353 PetscFunctionBegin; 3354 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3355 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3356 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3357 if (!rank) { 3358 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3359 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3360 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3361 } 3362 3363 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3364 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3365 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3366 3367 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3368 3369 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3370 M = header[1]; N = header[2]; 3371 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3372 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3373 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3374 3375 /* If global sizes are set, check if they are consistent with that given in the file */ 3376 if (sizesset) { 3377 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3378 } 3379 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3380 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3381 3382 /* determine ownership of all (block) rows */ 3383 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3384 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3385 else m = newMat->rmap->n; /* Set by user */ 3386 3387 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3388 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3389 3390 /* First process needs enough room for process with most rows */ 3391 if (!rank) { 3392 mmax = rowners[1]; 3393 for (i=2; i<=size; i++) { 3394 mmax = PetscMax(mmax, rowners[i]); 3395 } 3396 } else mmax = -1; /* unused, but compilers complain */ 3397 3398 rowners[0] = 0; 3399 for (i=2; i<=size; i++) { 3400 rowners[i] += rowners[i-1]; 3401 } 3402 rstart = rowners[rank]; 3403 rend = rowners[rank+1]; 3404 3405 /* distribute row lengths to all processors */ 3406 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3407 if (!rank) { 3408 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3409 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3410 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3411 for (j=0; j<m; j++) { 3412 procsnz[0] += ourlens[j]; 3413 } 3414 for (i=1; i<size; i++) { 3415 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3416 /* calculate the number of nonzeros on each processor */ 3417 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3418 procsnz[i] += rowlengths[j]; 3419 } 3420 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3421 } 3422 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3423 } else { 3424 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3425 } 3426 3427 if (!rank) { 3428 /* determine max buffer needed and allocate it */ 3429 maxnz = 0; 3430 for (i=0; i<size; i++) { 3431 maxnz = PetscMax(maxnz,procsnz[i]); 3432 } 3433 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3434 3435 /* read in my part of the matrix column indices */ 3436 nz = procsnz[0]; 3437 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3438 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3439 3440 /* read in every one elses and ship off */ 3441 for (i=1; i<size; i++) { 3442 nz = procsnz[i]; 3443 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3444 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3445 } 3446 ierr = PetscFree(cols);CHKERRQ(ierr); 3447 } else { 3448 /* determine buffer space needed for message */ 3449 nz = 0; 3450 for (i=0; i<m; i++) { 3451 nz += ourlens[i]; 3452 } 3453 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3454 3455 /* receive message of column indices*/ 3456 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3457 } 3458 3459 /* determine column ownership if matrix is not square */ 3460 if (N != M) { 3461 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3462 else n = newMat->cmap->n; 3463 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3464 cstart = cend - n; 3465 } else { 3466 cstart = rstart; 3467 cend = rend; 3468 n = cend - cstart; 3469 } 3470 3471 /* loop over local rows, determining number of off diagonal entries */ 3472 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3473 jj = 0; 3474 for (i=0; i<m; i++) { 3475 for (j=0; j<ourlens[i]; j++) { 3476 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3477 jj++; 3478 } 3479 } 3480 3481 for (i=0; i<m; i++) { 3482 ourlens[i] -= offlens[i]; 3483 } 3484 if (!sizesset) { 3485 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3486 } 3487 3488 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3489 3490 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3491 3492 for (i=0; i<m; i++) { 3493 ourlens[i] += offlens[i]; 3494 } 3495 3496 if (!rank) { 3497 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3498 3499 /* read in my part of the matrix numerical values */ 3500 nz = procsnz[0]; 3501 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3502 3503 /* insert into matrix */ 3504 jj = rstart; 3505 smycols = mycols; 3506 svals = vals; 3507 for (i=0; i<m; i++) { 3508 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3509 smycols += ourlens[i]; 3510 svals += ourlens[i]; 3511 jj++; 3512 } 3513 3514 /* read in other processors and ship out */ 3515 for (i=1; i<size; i++) { 3516 nz = procsnz[i]; 3517 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3518 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3519 } 3520 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3521 } else { 3522 /* receive numeric values */ 3523 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3524 3525 /* receive message of values*/ 3526 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3527 3528 /* insert into matrix */ 3529 jj = rstart; 3530 smycols = mycols; 3531 svals = vals; 3532 for (i=0; i<m; i++) { 3533 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3534 smycols += ourlens[i]; 3535 svals += ourlens[i]; 3536 jj++; 3537 } 3538 } 3539 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3540 ierr = PetscFree(vals);CHKERRQ(ierr); 3541 ierr = PetscFree(mycols);CHKERRQ(ierr); 3542 ierr = PetscFree(rowners);CHKERRQ(ierr); 3543 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3544 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3545 PetscFunctionReturn(0); 3546 } 3547 3548 #undef __FUNCT__ 3549 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3550 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3551 { 3552 PetscErrorCode ierr; 3553 IS iscol_local; 3554 PetscInt csize; 3555 3556 PetscFunctionBegin; 3557 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3558 if (call == MAT_REUSE_MATRIX) { 3559 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3560 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3561 } else { 3562 PetscInt cbs; 3563 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3564 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3565 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3566 } 3567 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3568 if (call == MAT_INITIAL_MATRIX) { 3569 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3570 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3571 } 3572 PetscFunctionReturn(0); 3573 } 3574 3575 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3576 #undef __FUNCT__ 3577 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3578 /* 3579 Not great since it makes two copies of the submatrix, first an SeqAIJ 3580 in local and then by concatenating the local matrices the end result. 3581 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3582 3583 Note: This requires a sequential iscol with all indices. 3584 */ 3585 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3586 { 3587 PetscErrorCode ierr; 3588 PetscMPIInt rank,size; 3589 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3590 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3591 PetscBool allcolumns, colflag; 3592 Mat M,Mreuse; 3593 MatScalar *vwork,*aa; 3594 MPI_Comm comm; 3595 Mat_SeqAIJ *aij; 3596 3597 PetscFunctionBegin; 3598 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3599 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3600 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3601 3602 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3603 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3604 if (colflag && ncol == mat->cmap->N) { 3605 allcolumns = PETSC_TRUE; 3606 } else { 3607 allcolumns = PETSC_FALSE; 3608 } 3609 if (call == MAT_REUSE_MATRIX) { 3610 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3611 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3612 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3613 } else { 3614 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3615 } 3616 3617 /* 3618 m - number of local rows 3619 n - number of columns (same on all processors) 3620 rstart - first row in new global matrix generated 3621 */ 3622 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3623 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3624 if (call == MAT_INITIAL_MATRIX) { 3625 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3626 ii = aij->i; 3627 jj = aij->j; 3628 3629 /* 3630 Determine the number of non-zeros in the diagonal and off-diagonal 3631 portions of the matrix in order to do correct preallocation 3632 */ 3633 3634 /* first get start and end of "diagonal" columns */ 3635 if (csize == PETSC_DECIDE) { 3636 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3637 if (mglobal == n) { /* square matrix */ 3638 nlocal = m; 3639 } else { 3640 nlocal = n/size + ((n % size) > rank); 3641 } 3642 } else { 3643 nlocal = csize; 3644 } 3645 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3646 rstart = rend - nlocal; 3647 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3648 3649 /* next, compute all the lengths */ 3650 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3651 olens = dlens + m; 3652 for (i=0; i<m; i++) { 3653 jend = ii[i+1] - ii[i]; 3654 olen = 0; 3655 dlen = 0; 3656 for (j=0; j<jend; j++) { 3657 if (*jj < rstart || *jj >= rend) olen++; 3658 else dlen++; 3659 jj++; 3660 } 3661 olens[i] = olen; 3662 dlens[i] = dlen; 3663 } 3664 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3665 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3666 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3667 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3668 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3669 ierr = PetscFree(dlens);CHKERRQ(ierr); 3670 } else { 3671 PetscInt ml,nl; 3672 3673 M = *newmat; 3674 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3675 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3676 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3677 /* 3678 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3679 rather than the slower MatSetValues(). 3680 */ 3681 M->was_assembled = PETSC_TRUE; 3682 M->assembled = PETSC_FALSE; 3683 } 3684 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3685 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3686 ii = aij->i; 3687 jj = aij->j; 3688 aa = aij->a; 3689 for (i=0; i<m; i++) { 3690 row = rstart + i; 3691 nz = ii[i+1] - ii[i]; 3692 cwork = jj; jj += nz; 3693 vwork = aa; aa += nz; 3694 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3695 } 3696 3697 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3698 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3699 *newmat = M; 3700 3701 /* save submatrix used in processor for next request */ 3702 if (call == MAT_INITIAL_MATRIX) { 3703 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3704 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3705 } 3706 PetscFunctionReturn(0); 3707 } 3708 3709 #undef __FUNCT__ 3710 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3711 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3712 { 3713 PetscInt m,cstart, cend,j,nnz,i,d; 3714 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3715 const PetscInt *JJ; 3716 PetscScalar *values; 3717 PetscErrorCode ierr; 3718 3719 PetscFunctionBegin; 3720 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3721 3722 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3723 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3724 m = B->rmap->n; 3725 cstart = B->cmap->rstart; 3726 cend = B->cmap->rend; 3727 rstart = B->rmap->rstart; 3728 3729 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3730 3731 #if defined(PETSC_USE_DEBUGGING) 3732 for (i=0; i<m; i++) { 3733 nnz = Ii[i+1]- Ii[i]; 3734 JJ = J + Ii[i]; 3735 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3736 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3737 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3738 } 3739 #endif 3740 3741 for (i=0; i<m; i++) { 3742 nnz = Ii[i+1]- Ii[i]; 3743 JJ = J + Ii[i]; 3744 nnz_max = PetscMax(nnz_max,nnz); 3745 d = 0; 3746 for (j=0; j<nnz; j++) { 3747 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3748 } 3749 d_nnz[i] = d; 3750 o_nnz[i] = nnz - d; 3751 } 3752 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3753 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3754 3755 if (v) values = (PetscScalar*)v; 3756 else { 3757 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3758 } 3759 3760 for (i=0; i<m; i++) { 3761 ii = i + rstart; 3762 nnz = Ii[i+1]- Ii[i]; 3763 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3764 } 3765 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3766 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3767 3768 if (!v) { 3769 ierr = PetscFree(values);CHKERRQ(ierr); 3770 } 3771 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3772 PetscFunctionReturn(0); 3773 } 3774 3775 #undef __FUNCT__ 3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3777 /*@ 3778 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3779 (the default parallel PETSc format). 3780 3781 Collective on MPI_Comm 3782 3783 Input Parameters: 3784 + B - the matrix 3785 . i - the indices into j for the start of each local row (starts with zero) 3786 . j - the column indices for each local row (starts with zero) 3787 - v - optional values in the matrix 3788 3789 Level: developer 3790 3791 Notes: 3792 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3793 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3794 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3795 3796 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3797 3798 The format which is used for the sparse matrix input, is equivalent to a 3799 row-major ordering.. i.e for the following matrix, the input data expected is 3800 as shown: 3801 3802 1 0 0 3803 2 0 3 P0 3804 ------- 3805 4 5 6 P1 3806 3807 Process0 [P0]: rows_owned=[0,1] 3808 i = {0,1,3} [size = nrow+1 = 2+1] 3809 j = {0,0,2} [size = nz = 6] 3810 v = {1,2,3} [size = nz = 6] 3811 3812 Process1 [P1]: rows_owned=[2] 3813 i = {0,3} [size = nrow+1 = 1+1] 3814 j = {0,1,2} [size = nz = 6] 3815 v = {4,5,6} [size = nz = 6] 3816 3817 .keywords: matrix, aij, compressed row, sparse, parallel 3818 3819 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3820 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3821 @*/ 3822 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3823 { 3824 PetscErrorCode ierr; 3825 3826 PetscFunctionBegin; 3827 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3828 PetscFunctionReturn(0); 3829 } 3830 3831 #undef __FUNCT__ 3832 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3833 /*@C 3834 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3835 (the default parallel PETSc format). For good matrix assembly performance 3836 the user should preallocate the matrix storage by setting the parameters 3837 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3838 performance can be increased by more than a factor of 50. 3839 3840 Collective on MPI_Comm 3841 3842 Input Parameters: 3843 + B - the matrix 3844 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3845 (same value is used for all local rows) 3846 . d_nnz - array containing the number of nonzeros in the various rows of the 3847 DIAGONAL portion of the local submatrix (possibly different for each row) 3848 or NULL, if d_nz is used to specify the nonzero structure. 3849 The size of this array is equal to the number of local rows, i.e 'm'. 3850 For matrices that will be factored, you must leave room for (and set) 3851 the diagonal entry even if it is zero. 3852 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3853 submatrix (same value is used for all local rows). 3854 - o_nnz - array containing the number of nonzeros in the various rows of the 3855 OFF-DIAGONAL portion of the local submatrix (possibly different for 3856 each row) or NULL, if o_nz is used to specify the nonzero 3857 structure. The size of this array is equal to the number 3858 of local rows, i.e 'm'. 3859 3860 If the *_nnz parameter is given then the *_nz parameter is ignored 3861 3862 The AIJ format (also called the Yale sparse matrix format or 3863 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3864 storage. The stored row and column indices begin with zero. 3865 See Users-Manual: ch_mat for details. 3866 3867 The parallel matrix is partitioned such that the first m0 rows belong to 3868 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3869 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3870 3871 The DIAGONAL portion of the local submatrix of a processor can be defined 3872 as the submatrix which is obtained by extraction the part corresponding to 3873 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3874 first row that belongs to the processor, r2 is the last row belonging to 3875 the this processor, and c1-c2 is range of indices of the local part of a 3876 vector suitable for applying the matrix to. This is an mxn matrix. In the 3877 common case of a square matrix, the row and column ranges are the same and 3878 the DIAGONAL part is also square. The remaining portion of the local 3879 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3880 3881 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3882 3883 You can call MatGetInfo() to get information on how effective the preallocation was; 3884 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3885 You can also run with the option -info and look for messages with the string 3886 malloc in them to see if additional memory allocation was needed. 3887 3888 Example usage: 3889 3890 Consider the following 8x8 matrix with 34 non-zero values, that is 3891 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3892 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3893 as follows: 3894 3895 .vb 3896 1 2 0 | 0 3 0 | 0 4 3897 Proc0 0 5 6 | 7 0 0 | 8 0 3898 9 0 10 | 11 0 0 | 12 0 3899 ------------------------------------- 3900 13 0 14 | 15 16 17 | 0 0 3901 Proc1 0 18 0 | 19 20 21 | 0 0 3902 0 0 0 | 22 23 0 | 24 0 3903 ------------------------------------- 3904 Proc2 25 26 27 | 0 0 28 | 29 0 3905 30 0 0 | 31 32 33 | 0 34 3906 .ve 3907 3908 This can be represented as a collection of submatrices as: 3909 3910 .vb 3911 A B C 3912 D E F 3913 G H I 3914 .ve 3915 3916 Where the submatrices A,B,C are owned by proc0, D,E,F are 3917 owned by proc1, G,H,I are owned by proc2. 3918 3919 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3920 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3921 The 'M','N' parameters are 8,8, and have the same values on all procs. 3922 3923 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3924 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3925 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3926 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3927 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3928 matrix, ans [DF] as another SeqAIJ matrix. 3929 3930 When d_nz, o_nz parameters are specified, d_nz storage elements are 3931 allocated for every row of the local diagonal submatrix, and o_nz 3932 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3933 One way to choose d_nz and o_nz is to use the max nonzerors per local 3934 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3935 In this case, the values of d_nz,o_nz are: 3936 .vb 3937 proc0 : dnz = 2, o_nz = 2 3938 proc1 : dnz = 3, o_nz = 2 3939 proc2 : dnz = 1, o_nz = 4 3940 .ve 3941 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3942 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3943 for proc3. i.e we are using 12+15+10=37 storage locations to store 3944 34 values. 3945 3946 When d_nnz, o_nnz parameters are specified, the storage is specified 3947 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3948 In the above case the values for d_nnz,o_nnz are: 3949 .vb 3950 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3951 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3952 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3953 .ve 3954 Here the space allocated is sum of all the above values i.e 34, and 3955 hence pre-allocation is perfect. 3956 3957 Level: intermediate 3958 3959 .keywords: matrix, aij, compressed row, sparse, parallel 3960 3961 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3962 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3963 @*/ 3964 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3965 { 3966 PetscErrorCode ierr; 3967 3968 PetscFunctionBegin; 3969 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3970 PetscValidType(B,1); 3971 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3972 PetscFunctionReturn(0); 3973 } 3974 3975 #undef __FUNCT__ 3976 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3977 /*@ 3978 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3979 CSR format the local rows. 3980 3981 Collective on MPI_Comm 3982 3983 Input Parameters: 3984 + comm - MPI communicator 3985 . m - number of local rows (Cannot be PETSC_DECIDE) 3986 . n - This value should be the same as the local size used in creating the 3987 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3988 calculated if N is given) For square matrices n is almost always m. 3989 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3990 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3991 . i - row indices 3992 . j - column indices 3993 - a - matrix values 3994 3995 Output Parameter: 3996 . mat - the matrix 3997 3998 Level: intermediate 3999 4000 Notes: 4001 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4002 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4003 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4004 4005 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4006 4007 The format which is used for the sparse matrix input, is equivalent to a 4008 row-major ordering.. i.e for the following matrix, the input data expected is 4009 as shown: 4010 4011 1 0 0 4012 2 0 3 P0 4013 ------- 4014 4 5 6 P1 4015 4016 Process0 [P0]: rows_owned=[0,1] 4017 i = {0,1,3} [size = nrow+1 = 2+1] 4018 j = {0,0,2} [size = nz = 6] 4019 v = {1,2,3} [size = nz = 6] 4020 4021 Process1 [P1]: rows_owned=[2] 4022 i = {0,3} [size = nrow+1 = 1+1] 4023 j = {0,1,2} [size = nz = 6] 4024 v = {4,5,6} [size = nz = 6] 4025 4026 .keywords: matrix, aij, compressed row, sparse, parallel 4027 4028 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4029 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4030 @*/ 4031 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4032 { 4033 PetscErrorCode ierr; 4034 4035 PetscFunctionBegin; 4036 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4037 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4038 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4039 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4040 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4041 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4042 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4043 PetscFunctionReturn(0); 4044 } 4045 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatCreateAIJ" 4048 /*@C 4049 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4050 (the default parallel PETSc format). For good matrix assembly performance 4051 the user should preallocate the matrix storage by setting the parameters 4052 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4053 performance can be increased by more than a factor of 50. 4054 4055 Collective on MPI_Comm 4056 4057 Input Parameters: 4058 + comm - MPI communicator 4059 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4060 This value should be the same as the local size used in creating the 4061 y vector for the matrix-vector product y = Ax. 4062 . n - This value should be the same as the local size used in creating the 4063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4064 calculated if N is given) For square matrices n is almost always m. 4065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4067 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4068 (same value is used for all local rows) 4069 . d_nnz - array containing the number of nonzeros in the various rows of the 4070 DIAGONAL portion of the local submatrix (possibly different for each row) 4071 or NULL, if d_nz is used to specify the nonzero structure. 4072 The size of this array is equal to the number of local rows, i.e 'm'. 4073 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4074 submatrix (same value is used for all local rows). 4075 - o_nnz - array containing the number of nonzeros in the various rows of the 4076 OFF-DIAGONAL portion of the local submatrix (possibly different for 4077 each row) or NULL, if o_nz is used to specify the nonzero 4078 structure. The size of this array is equal to the number 4079 of local rows, i.e 'm'. 4080 4081 Output Parameter: 4082 . A - the matrix 4083 4084 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4085 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4086 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4087 4088 Notes: 4089 If the *_nnz parameter is given then the *_nz parameter is ignored 4090 4091 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4092 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4093 storage requirements for this matrix. 4094 4095 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4096 processor than it must be used on all processors that share the object for 4097 that argument. 4098 4099 The user MUST specify either the local or global matrix dimensions 4100 (possibly both). 4101 4102 The parallel matrix is partitioned across processors such that the 4103 first m0 rows belong to process 0, the next m1 rows belong to 4104 process 1, the next m2 rows belong to process 2 etc.. where 4105 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4106 values corresponding to [m x N] submatrix. 4107 4108 The columns are logically partitioned with the n0 columns belonging 4109 to 0th partition, the next n1 columns belonging to the next 4110 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4111 4112 The DIAGONAL portion of the local submatrix on any given processor 4113 is the submatrix corresponding to the rows and columns m,n 4114 corresponding to the given processor. i.e diagonal matrix on 4115 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4116 etc. The remaining portion of the local submatrix [m x (N-n)] 4117 constitute the OFF-DIAGONAL portion. The example below better 4118 illustrates this concept. 4119 4120 For a square global matrix we define each processor's diagonal portion 4121 to be its local rows and the corresponding columns (a square submatrix); 4122 each processor's off-diagonal portion encompasses the remainder of the 4123 local matrix (a rectangular submatrix). 4124 4125 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4126 4127 When calling this routine with a single process communicator, a matrix of 4128 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4129 type of communicator, use the construction mechanism: 4130 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4131 4132 By default, this format uses inodes (identical nodes) when possible. 4133 We search for consecutive rows with the same nonzero structure, thereby 4134 reusing matrix information to achieve increased efficiency. 4135 4136 Options Database Keys: 4137 + -mat_no_inode - Do not use inodes 4138 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4139 - -mat_aij_oneindex - Internally use indexing starting at 1 4140 rather than 0. Note that when calling MatSetValues(), 4141 the user still MUST index entries starting at 0! 4142 4143 4144 Example usage: 4145 4146 Consider the following 8x8 matrix with 34 non-zero values, that is 4147 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4148 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4149 as follows: 4150 4151 .vb 4152 1 2 0 | 0 3 0 | 0 4 4153 Proc0 0 5 6 | 7 0 0 | 8 0 4154 9 0 10 | 11 0 0 | 12 0 4155 ------------------------------------- 4156 13 0 14 | 15 16 17 | 0 0 4157 Proc1 0 18 0 | 19 20 21 | 0 0 4158 0 0 0 | 22 23 0 | 24 0 4159 ------------------------------------- 4160 Proc2 25 26 27 | 0 0 28 | 29 0 4161 30 0 0 | 31 32 33 | 0 34 4162 .ve 4163 4164 This can be represented as a collection of submatrices as: 4165 4166 .vb 4167 A B C 4168 D E F 4169 G H I 4170 .ve 4171 4172 Where the submatrices A,B,C are owned by proc0, D,E,F are 4173 owned by proc1, G,H,I are owned by proc2. 4174 4175 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4176 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4177 The 'M','N' parameters are 8,8, and have the same values on all procs. 4178 4179 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4180 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4181 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4182 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4183 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4184 matrix, ans [DF] as another SeqAIJ matrix. 4185 4186 When d_nz, o_nz parameters are specified, d_nz storage elements are 4187 allocated for every row of the local diagonal submatrix, and o_nz 4188 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4189 One way to choose d_nz and o_nz is to use the max nonzerors per local 4190 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4191 In this case, the values of d_nz,o_nz are: 4192 .vb 4193 proc0 : dnz = 2, o_nz = 2 4194 proc1 : dnz = 3, o_nz = 2 4195 proc2 : dnz = 1, o_nz = 4 4196 .ve 4197 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4198 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4199 for proc3. i.e we are using 12+15+10=37 storage locations to store 4200 34 values. 4201 4202 When d_nnz, o_nnz parameters are specified, the storage is specified 4203 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4204 In the above case the values for d_nnz,o_nnz are: 4205 .vb 4206 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4207 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4208 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4209 .ve 4210 Here the space allocated is sum of all the above values i.e 34, and 4211 hence pre-allocation is perfect. 4212 4213 Level: intermediate 4214 4215 .keywords: matrix, aij, compressed row, sparse, parallel 4216 4217 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4218 MPIAIJ, MatCreateMPIAIJWithArrays() 4219 @*/ 4220 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4221 { 4222 PetscErrorCode ierr; 4223 PetscMPIInt size; 4224 4225 PetscFunctionBegin; 4226 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4227 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4228 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4229 if (size > 1) { 4230 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4231 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4232 } else { 4233 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4234 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4235 } 4236 PetscFunctionReturn(0); 4237 } 4238 4239 #undef __FUNCT__ 4240 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4241 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4242 { 4243 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4244 4245 PetscFunctionBegin; 4246 if (Ad) *Ad = a->A; 4247 if (Ao) *Ao = a->B; 4248 if (colmap) *colmap = a->garray; 4249 PetscFunctionReturn(0); 4250 } 4251 4252 #undef __FUNCT__ 4253 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4254 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4255 { 4256 PetscErrorCode ierr; 4257 PetscInt i; 4258 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4259 4260 PetscFunctionBegin; 4261 if (coloring->ctype == IS_COLORING_GLOBAL) { 4262 ISColoringValue *allcolors,*colors; 4263 ISColoring ocoloring; 4264 4265 /* set coloring for diagonal portion */ 4266 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4267 4268 /* set coloring for off-diagonal portion */ 4269 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4270 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4271 for (i=0; i<a->B->cmap->n; i++) { 4272 colors[i] = allcolors[a->garray[i]]; 4273 } 4274 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4275 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4276 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4277 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4278 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4279 ISColoringValue *colors; 4280 PetscInt *larray; 4281 ISColoring ocoloring; 4282 4283 /* set coloring for diagonal portion */ 4284 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4285 for (i=0; i<a->A->cmap->n; i++) { 4286 larray[i] = i + A->cmap->rstart; 4287 } 4288 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4289 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4290 for (i=0; i<a->A->cmap->n; i++) { 4291 colors[i] = coloring->colors[larray[i]]; 4292 } 4293 ierr = PetscFree(larray);CHKERRQ(ierr); 4294 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4295 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4296 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4297 4298 /* set coloring for off-diagonal portion */ 4299 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4300 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4301 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4302 for (i=0; i<a->B->cmap->n; i++) { 4303 colors[i] = coloring->colors[larray[i]]; 4304 } 4305 ierr = PetscFree(larray);CHKERRQ(ierr); 4306 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4307 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4308 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4309 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4310 PetscFunctionReturn(0); 4311 } 4312 4313 #undef __FUNCT__ 4314 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4315 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4316 { 4317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4318 PetscErrorCode ierr; 4319 4320 PetscFunctionBegin; 4321 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4322 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4323 PetscFunctionReturn(0); 4324 } 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4328 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4329 { 4330 PetscErrorCode ierr; 4331 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4332 PetscInt *indx; 4333 4334 PetscFunctionBegin; 4335 /* This routine will ONLY return MPIAIJ type matrix */ 4336 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4337 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4338 if (n == PETSC_DECIDE) { 4339 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4340 } 4341 /* Check sum(n) = N */ 4342 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4343 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4344 4345 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4346 rstart -= m; 4347 4348 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4349 for (i=0; i<m; i++) { 4350 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4351 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4352 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4353 } 4354 4355 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4356 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4357 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4358 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4359 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4360 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4366 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4367 { 4368 PetscErrorCode ierr; 4369 PetscInt m,N,i,rstart,nnz,Ii; 4370 PetscInt *indx; 4371 PetscScalar *values; 4372 4373 PetscFunctionBegin; 4374 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4375 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4376 for (i=0; i<m; i++) { 4377 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4378 Ii = i + rstart; 4379 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4380 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4381 } 4382 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4383 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #undef __FUNCT__ 4388 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4389 /*@ 4390 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4391 matrices from each processor 4392 4393 Collective on MPI_Comm 4394 4395 Input Parameters: 4396 + comm - the communicators the parallel matrix will live on 4397 . inmat - the input sequential matrices 4398 . n - number of local columns (or PETSC_DECIDE) 4399 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4400 4401 Output Parameter: 4402 . outmat - the parallel matrix generated 4403 4404 Level: advanced 4405 4406 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4407 4408 @*/ 4409 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4410 { 4411 PetscErrorCode ierr; 4412 PetscMPIInt size; 4413 4414 PetscFunctionBegin; 4415 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4416 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4417 if (size == 1) { 4418 if (scall == MAT_INITIAL_MATRIX) { 4419 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4420 } else { 4421 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4422 } 4423 } else { 4424 if (scall == MAT_INITIAL_MATRIX) { 4425 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4426 } 4427 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4428 } 4429 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 #undef __FUNCT__ 4434 #define __FUNCT__ "MatFileSplit" 4435 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4436 { 4437 PetscErrorCode ierr; 4438 PetscMPIInt rank; 4439 PetscInt m,N,i,rstart,nnz; 4440 size_t len; 4441 const PetscInt *indx; 4442 PetscViewer out; 4443 char *name; 4444 Mat B; 4445 const PetscScalar *values; 4446 4447 PetscFunctionBegin; 4448 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4449 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4450 /* Should this be the type of the diagonal block of A? */ 4451 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4452 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4453 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4454 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4456 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4457 for (i=0; i<m; i++) { 4458 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4459 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4460 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4461 } 4462 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4463 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4464 4465 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4466 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4467 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4468 sprintf(name,"%s.%d",outfile,rank); 4469 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4470 ierr = PetscFree(name);CHKERRQ(ierr); 4471 ierr = MatView(B,out);CHKERRQ(ierr); 4472 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4473 ierr = MatDestroy(&B);CHKERRQ(ierr); 4474 PetscFunctionReturn(0); 4475 } 4476 4477 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4478 #undef __FUNCT__ 4479 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4480 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4481 { 4482 PetscErrorCode ierr; 4483 Mat_Merge_SeqsToMPI *merge; 4484 PetscContainer container; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4488 if (container) { 4489 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4502 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4503 ierr = PetscFree(merge);CHKERRQ(ierr); 4504 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4505 } 4506 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #include <../src/mat/utils/freespace.h> 4511 #include <petscbt.h> 4512 4513 #undef __FUNCT__ 4514 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 MPI_Comm comm; 4519 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,taga,*len_s; 4521 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4522 PetscInt proc,m; 4523 PetscInt **buf_ri,**buf_rj; 4524 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4525 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4526 MPI_Request *s_waits,*r_waits; 4527 MPI_Status *status; 4528 MatScalar *aa=a->a; 4529 MatScalar **abuf_r,*ba_i; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4535 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4536 4537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4539 4540 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4542 4543 bi = merge->bi; 4544 bj = merge->bj; 4545 buf_ri = merge->buf_ri; 4546 buf_rj = merge->buf_rj; 4547 4548 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4549 owners = merge->rowmap->range; 4550 len_s = merge->len_s; 4551 4552 /* send and recv matrix values */ 4553 /*-----------------------------*/ 4554 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4555 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4556 4557 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4558 for (proc=0,k=0; proc<size; proc++) { 4559 if (!len_s[proc]) continue; 4560 i = owners[proc]; 4561 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4562 k++; 4563 } 4564 4565 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4566 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4567 ierr = PetscFree(status);CHKERRQ(ierr); 4568 4569 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4570 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4571 4572 /* insert mat values of mpimat */ 4573 /*----------------------------*/ 4574 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4575 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4576 4577 for (k=0; k<merge->nrecv; k++) { 4578 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4579 nrows = *(buf_ri_k[k]); 4580 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4581 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4582 } 4583 4584 /* set values of ba */ 4585 m = merge->rowmap->n; 4586 for (i=0; i<m; i++) { 4587 arow = owners[rank] + i; 4588 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4589 bnzi = bi[i+1] - bi[i]; 4590 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4591 4592 /* add local non-zero vals of this proc's seqmat into ba */ 4593 anzi = ai[arow+1] - ai[arow]; 4594 aj = a->j + ai[arow]; 4595 aa = a->a + ai[arow]; 4596 nextaj = 0; 4597 for (j=0; nextaj<anzi; j++) { 4598 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4599 ba_i[j] += aa[nextaj++]; 4600 } 4601 } 4602 4603 /* add received vals into ba */ 4604 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4605 /* i-th row */ 4606 if (i == *nextrow[k]) { 4607 anzi = *(nextai[k]+1) - *nextai[k]; 4608 aj = buf_rj[k] + *(nextai[k]); 4609 aa = abuf_r[k] + *(nextai[k]); 4610 nextaj = 0; 4611 for (j=0; nextaj<anzi; j++) { 4612 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4613 ba_i[j] += aa[nextaj++]; 4614 } 4615 } 4616 nextrow[k]++; nextai[k]++; 4617 } 4618 } 4619 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4620 } 4621 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 4624 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4625 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4626 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4627 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4628 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4629 PetscFunctionReturn(0); 4630 } 4631 4632 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4633 4634 #undef __FUNCT__ 4635 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4636 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4637 { 4638 PetscErrorCode ierr; 4639 Mat B_mpi; 4640 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4641 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4642 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4643 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4644 PetscInt len,proc,*dnz,*onz,bs,cbs; 4645 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4646 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4647 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4648 MPI_Status *status; 4649 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4650 PetscBT lnkbt; 4651 Mat_Merge_SeqsToMPI *merge; 4652 PetscContainer container; 4653 4654 PetscFunctionBegin; 4655 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4656 4657 /* make sure it is a PETSc comm */ 4658 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4660 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4661 4662 ierr = PetscNew(&merge);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4664 4665 /* determine row ownership */ 4666 /*---------------------------------------------------------*/ 4667 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4674 4675 m = merge->rowmap->n; 4676 owners = merge->rowmap->range; 4677 4678 /* determine the number of messages to send, their lengths */ 4679 /*---------------------------------------------------------*/ 4680 len_s = merge->len_s; 4681 4682 len = 0; /* length of buf_si[] */ 4683 merge->nsend = 0; 4684 for (proc=0; proc<size; proc++) { 4685 len_si[proc] = 0; 4686 if (proc == rank) { 4687 len_s[proc] = 0; 4688 } else { 4689 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4690 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4691 } 4692 if (len_s[proc]) { 4693 merge->nsend++; 4694 nrows = 0; 4695 for (i=owners[proc]; i<owners[proc+1]; i++) { 4696 if (ai[i+1] > ai[i]) nrows++; 4697 } 4698 len_si[proc] = 2*(nrows+1); 4699 len += len_si[proc]; 4700 } 4701 } 4702 4703 /* determine the number and length of messages to receive for ij-structure */ 4704 /*-------------------------------------------------------------------------*/ 4705 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4706 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4707 4708 /* post the Irecv of j-structure */ 4709 /*-------------------------------*/ 4710 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4711 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4712 4713 /* post the Isend of j-structure */ 4714 /*--------------------------------*/ 4715 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4716 4717 for (proc=0, k=0; proc<size; proc++) { 4718 if (!len_s[proc]) continue; 4719 i = owners[proc]; 4720 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4721 k++; 4722 } 4723 4724 /* receives and sends of j-structure are complete */ 4725 /*------------------------------------------------*/ 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4728 4729 /* send and recv i-structure */ 4730 /*---------------------------*/ 4731 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4732 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4733 4734 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4735 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4736 for (proc=0,k=0; proc<size; proc++) { 4737 if (!len_s[proc]) continue; 4738 /* form outgoing message for i-structure: 4739 buf_si[0]: nrows to be sent 4740 [1:nrows]: row index (global) 4741 [nrows+1:2*nrows+1]: i-structure index 4742 */ 4743 /*-------------------------------------------*/ 4744 nrows = len_si[proc]/2 - 1; 4745 buf_si_i = buf_si + nrows+1; 4746 buf_si[0] = nrows; 4747 buf_si_i[0] = 0; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++) { 4750 anzi = ai[i+1] - ai[i]; 4751 if (anzi) { 4752 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4753 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4754 nrows++; 4755 } 4756 } 4757 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4758 k++; 4759 buf_si += len_si[proc]; 4760 } 4761 4762 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4763 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4764 4765 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4766 for (i=0; i<merge->nrecv; i++) { 4767 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4768 } 4769 4770 ierr = PetscFree(len_si);CHKERRQ(ierr); 4771 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4772 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4776 ierr = PetscFree(status);CHKERRQ(ierr); 4777 4778 /* compute a local seq matrix in each processor */ 4779 /*----------------------------------------------*/ 4780 /* allocate bi array and free space for accumulating nonzero column info */ 4781 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4782 bi[0] = 0; 4783 4784 /* create and initialize a linked list */ 4785 nlnk = N+1; 4786 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4789 len = ai[owners[rank+1]] - ai[owners[rank]]; 4790 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4791 4792 current_space = free_space; 4793 4794 /* determine symbolic info for each local row */ 4795 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4796 4797 for (k=0; k<merge->nrecv; k++) { 4798 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4799 nrows = *buf_ri_k[k]; 4800 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4801 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4802 } 4803 4804 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4805 len = 0; 4806 for (i=0; i<m; i++) { 4807 bnzi = 0; 4808 /* add local non-zero cols of this proc's seqmat into lnk */ 4809 arow = owners[rank] + i; 4810 anzi = ai[arow+1] - ai[arow]; 4811 aj = a->j + ai[arow]; 4812 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 bnzi += nlnk; 4814 /* add received col data into lnk */ 4815 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4816 if (i == *nextrow[k]) { /* i-th row */ 4817 anzi = *(nextai[k]+1) - *nextai[k]; 4818 aj = buf_rj[k] + *nextai[k]; 4819 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4820 bnzi += nlnk; 4821 nextrow[k]++; nextai[k]++; 4822 } 4823 } 4824 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4825 4826 /* if free space is not available, make more free space */ 4827 if (current_space->local_remaining<bnzi) { 4828 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4829 nspacedouble++; 4830 } 4831 /* copy data into free space, then initialize lnk */ 4832 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4833 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4834 4835 current_space->array += bnzi; 4836 current_space->local_used += bnzi; 4837 current_space->local_remaining -= bnzi; 4838 4839 bi[i+1] = bi[i] + bnzi; 4840 } 4841 4842 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4843 4844 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4845 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4847 4848 /* create symbolic parallel matrix B_mpi */ 4849 /*---------------------------------------*/ 4850 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4851 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4852 if (n==PETSC_DECIDE) { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4854 } else { 4855 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4856 } 4857 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4858 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4859 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4860 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4861 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4862 4863 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4864 B_mpi->assembled = PETSC_FALSE; 4865 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4866 merge->bi = bi; 4867 merge->bj = bj; 4868 merge->buf_ri = buf_ri; 4869 merge->buf_rj = buf_rj; 4870 merge->coi = NULL; 4871 merge->coj = NULL; 4872 merge->owners_co = NULL; 4873 4874 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4875 4876 /* attach the supporting struct to B_mpi for reuse */ 4877 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4878 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4879 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4880 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4881 *mpimat = B_mpi; 4882 4883 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 #undef __FUNCT__ 4888 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4889 /*@C 4890 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4891 matrices from each processor 4892 4893 Collective on MPI_Comm 4894 4895 Input Parameters: 4896 + comm - the communicators the parallel matrix will live on 4897 . seqmat - the input sequential matrices 4898 . m - number of local rows (or PETSC_DECIDE) 4899 . n - number of local columns (or PETSC_DECIDE) 4900 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4901 4902 Output Parameter: 4903 . mpimat - the parallel matrix generated 4904 4905 Level: advanced 4906 4907 Notes: 4908 The dimensions of the sequential matrix in each processor MUST be the same. 4909 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4910 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4911 @*/ 4912 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4913 { 4914 PetscErrorCode ierr; 4915 PetscMPIInt size; 4916 4917 PetscFunctionBegin; 4918 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4919 if (size == 1) { 4920 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 if (scall == MAT_INITIAL_MATRIX) { 4922 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4923 } else { 4924 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4925 } 4926 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 if (scall == MAT_INITIAL_MATRIX) { 4931 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4932 } 4933 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4934 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4935 PetscFunctionReturn(0); 4936 } 4937 4938 #undef __FUNCT__ 4939 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4940 /*@ 4941 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4942 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4943 with MatGetSize() 4944 4945 Not Collective 4946 4947 Input Parameters: 4948 + A - the matrix 4949 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4950 4951 Output Parameter: 4952 . A_loc - the local sequential matrix generated 4953 4954 Level: developer 4955 4956 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4957 4958 @*/ 4959 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4960 { 4961 PetscErrorCode ierr; 4962 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4963 Mat_SeqAIJ *mat,*a,*b; 4964 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4965 MatScalar *aa,*ba,*cam; 4966 PetscScalar *ca; 4967 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4968 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4974 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4975 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4976 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4977 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4978 aa = a->a; ba = b->a; 4979 if (scall == MAT_INITIAL_MATRIX) { 4980 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4981 ci[0] = 0; 4982 for (i=0; i<am; i++) { 4983 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4984 } 4985 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4986 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4987 k = 0; 4988 for (i=0; i<am; i++) { 4989 ncols_o = bi[i+1] - bi[i]; 4990 ncols_d = ai[i+1] - ai[i]; 4991 /* off-diagonal portion of A */ 4992 for (jo=0; jo<ncols_o; jo++) { 4993 col = cmap[*bj]; 4994 if (col >= cstart) break; 4995 cj[k] = col; bj++; 4996 ca[k++] = *ba++; 4997 } 4998 /* diagonal portion of A */ 4999 for (j=0; j<ncols_d; j++) { 5000 cj[k] = cstart + *aj++; 5001 ca[k++] = *aa++; 5002 } 5003 /* off-diagonal portion of A */ 5004 for (j=jo; j<ncols_o; j++) { 5005 cj[k] = cmap[*bj++]; 5006 ca[k++] = *ba++; 5007 } 5008 } 5009 /* put together the new matrix */ 5010 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5011 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5012 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5013 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5014 mat->free_a = PETSC_TRUE; 5015 mat->free_ij = PETSC_TRUE; 5016 mat->nonew = 0; 5017 } else if (scall == MAT_REUSE_MATRIX) { 5018 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5019 ci = mat->i; cj = mat->j; cam = mat->a; 5020 for (i=0; i<am; i++) { 5021 /* off-diagonal portion of A */ 5022 ncols_o = bi[i+1] - bi[i]; 5023 for (jo=0; jo<ncols_o; jo++) { 5024 col = cmap[*bj]; 5025 if (col >= cstart) break; 5026 *cam++ = *ba++; bj++; 5027 } 5028 /* diagonal portion of A */ 5029 ncols_d = ai[i+1] - ai[i]; 5030 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5031 /* off-diagonal portion of A */ 5032 for (j=jo; j<ncols_o; j++) { 5033 *cam++ = *ba++; bj++; 5034 } 5035 } 5036 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5037 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5038 PetscFunctionReturn(0); 5039 } 5040 5041 #undef __FUNCT__ 5042 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5043 /*@C 5044 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5045 5046 Not Collective 5047 5048 Input Parameters: 5049 + A - the matrix 5050 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5051 - row, col - index sets of rows and columns to extract (or NULL) 5052 5053 Output Parameter: 5054 . A_loc - the local sequential matrix generated 5055 5056 Level: developer 5057 5058 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5059 5060 @*/ 5061 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5062 { 5063 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5064 PetscErrorCode ierr; 5065 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5066 IS isrowa,iscola; 5067 Mat *aloc; 5068 PetscBool match; 5069 5070 PetscFunctionBegin; 5071 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5072 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5073 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5074 if (!row) { 5075 start = A->rmap->rstart; end = A->rmap->rend; 5076 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5077 } else { 5078 isrowa = *row; 5079 } 5080 if (!col) { 5081 start = A->cmap->rstart; 5082 cmap = a->garray; 5083 nzA = a->A->cmap->n; 5084 nzB = a->B->cmap->n; 5085 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5086 ncols = 0; 5087 for (i=0; i<nzB; i++) { 5088 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5089 else break; 5090 } 5091 imark = i; 5092 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5093 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5094 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5095 } else { 5096 iscola = *col; 5097 } 5098 if (scall != MAT_INITIAL_MATRIX) { 5099 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5100 aloc[0] = *A_loc; 5101 } 5102 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5103 *A_loc = aloc[0]; 5104 ierr = PetscFree(aloc);CHKERRQ(ierr); 5105 if (!row) { 5106 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5107 } 5108 if (!col) { 5109 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5110 } 5111 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5112 PetscFunctionReturn(0); 5113 } 5114 5115 #undef __FUNCT__ 5116 #define __FUNCT__ "MatGetBrowsOfAcols" 5117 /*@C 5118 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5119 5120 Collective on Mat 5121 5122 Input Parameters: 5123 + A,B - the matrices in mpiaij format 5124 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5125 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5126 5127 Output Parameter: 5128 + rowb, colb - index sets of rows and columns of B to extract 5129 - B_seq - the sequential matrix generated 5130 5131 Level: developer 5132 5133 @*/ 5134 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5135 { 5136 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5137 PetscErrorCode ierr; 5138 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5139 IS isrowb,iscolb; 5140 Mat *bseq=NULL; 5141 5142 PetscFunctionBegin; 5143 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5144 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5145 } 5146 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5147 5148 if (scall == MAT_INITIAL_MATRIX) { 5149 start = A->cmap->rstart; 5150 cmap = a->garray; 5151 nzA = a->A->cmap->n; 5152 nzB = a->B->cmap->n; 5153 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5154 ncols = 0; 5155 for (i=0; i<nzB; i++) { /* row < local row index */ 5156 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5157 else break; 5158 } 5159 imark = i; 5160 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5161 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5162 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5163 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5164 } else { 5165 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5166 isrowb = *rowb; iscolb = *colb; 5167 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5168 bseq[0] = *B_seq; 5169 } 5170 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5171 *B_seq = bseq[0]; 5172 ierr = PetscFree(bseq);CHKERRQ(ierr); 5173 if (!rowb) { 5174 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5175 } else { 5176 *rowb = isrowb; 5177 } 5178 if (!colb) { 5179 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5180 } else { 5181 *colb = iscolb; 5182 } 5183 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 #undef __FUNCT__ 5188 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5189 /* 5190 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5191 of the OFF-DIAGONAL portion of local A 5192 5193 Collective on Mat 5194 5195 Input Parameters: 5196 + A,B - the matrices in mpiaij format 5197 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5198 5199 Output Parameter: 5200 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5201 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5202 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5203 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5204 5205 Level: developer 5206 5207 */ 5208 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5209 { 5210 VecScatter_MPI_General *gen_to,*gen_from; 5211 PetscErrorCode ierr; 5212 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5213 Mat_SeqAIJ *b_oth; 5214 VecScatter ctx =a->Mvctx; 5215 MPI_Comm comm; 5216 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5217 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5218 PetscScalar *rvalues,*svalues; 5219 MatScalar *b_otha,*bufa,*bufA; 5220 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5221 MPI_Request *rwaits = NULL,*swaits = NULL; 5222 MPI_Status *sstatus,rstatus; 5223 PetscMPIInt jj; 5224 PetscInt *cols,sbs,rbs; 5225 PetscScalar *vals; 5226 5227 PetscFunctionBegin; 5228 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5229 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5230 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5231 } 5232 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5233 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5234 5235 gen_to = (VecScatter_MPI_General*)ctx->todata; 5236 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5237 rvalues = gen_from->values; /* holds the length of receiving row */ 5238 svalues = gen_to->values; /* holds the length of sending row */ 5239 nrecvs = gen_from->n; 5240 nsends = gen_to->n; 5241 5242 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5243 srow = gen_to->indices; /* local row index to be sent */ 5244 sstarts = gen_to->starts; 5245 sprocs = gen_to->procs; 5246 sstatus = gen_to->sstatus; 5247 sbs = gen_to->bs; 5248 rstarts = gen_from->starts; 5249 rprocs = gen_from->procs; 5250 rbs = gen_from->bs; 5251 5252 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5253 if (scall == MAT_INITIAL_MATRIX) { 5254 /* i-array */ 5255 /*---------*/ 5256 /* post receives */ 5257 for (i=0; i<nrecvs; i++) { 5258 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5259 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5260 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5261 } 5262 5263 /* pack the outgoing message */ 5264 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5265 5266 sstartsj[0] = 0; 5267 rstartsj[0] = 0; 5268 len = 0; /* total length of j or a array to be sent */ 5269 k = 0; 5270 for (i=0; i<nsends; i++) { 5271 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5272 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5273 for (j=0; j<nrows; j++) { 5274 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5275 for (l=0; l<sbs; l++) { 5276 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5277 5278 rowlen[j*sbs+l] = ncols; 5279 5280 len += ncols; 5281 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5282 } 5283 k++; 5284 } 5285 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5286 5287 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5288 } 5289 /* recvs and sends of i-array are completed */ 5290 i = nrecvs; 5291 while (i--) { 5292 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5293 } 5294 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5295 5296 /* allocate buffers for sending j and a arrays */ 5297 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5298 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5299 5300 /* create i-array of B_oth */ 5301 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5302 5303 b_othi[0] = 0; 5304 len = 0; /* total length of j or a array to be received */ 5305 k = 0; 5306 for (i=0; i<nrecvs; i++) { 5307 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5308 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5309 for (j=0; j<nrows; j++) { 5310 b_othi[k+1] = b_othi[k] + rowlen[j]; 5311 len += rowlen[j]; k++; 5312 } 5313 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5314 } 5315 5316 /* allocate space for j and a arrrays of B_oth */ 5317 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5318 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5319 5320 /* j-array */ 5321 /*---------*/ 5322 /* post receives of j-array */ 5323 for (i=0; i<nrecvs; i++) { 5324 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5325 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5326 } 5327 5328 /* pack the outgoing message j-array */ 5329 k = 0; 5330 for (i=0; i<nsends; i++) { 5331 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5332 bufJ = bufj+sstartsj[i]; 5333 for (j=0; j<nrows; j++) { 5334 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5335 for (ll=0; ll<sbs; ll++) { 5336 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5337 for (l=0; l<ncols; l++) { 5338 *bufJ++ = cols[l]; 5339 } 5340 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5341 } 5342 } 5343 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5344 } 5345 5346 /* recvs and sends of j-array are completed */ 5347 i = nrecvs; 5348 while (i--) { 5349 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5350 } 5351 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5352 } else if (scall == MAT_REUSE_MATRIX) { 5353 sstartsj = *startsj_s; 5354 rstartsj = *startsj_r; 5355 bufa = *bufa_ptr; 5356 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5357 b_otha = b_oth->a; 5358 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5359 5360 /* a-array */ 5361 /*---------*/ 5362 /* post receives of a-array */ 5363 for (i=0; i<nrecvs; i++) { 5364 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5365 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5366 } 5367 5368 /* pack the outgoing message a-array */ 5369 k = 0; 5370 for (i=0; i<nsends; i++) { 5371 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5372 bufA = bufa+sstartsj[i]; 5373 for (j=0; j<nrows; j++) { 5374 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5375 for (ll=0; ll<sbs; ll++) { 5376 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5377 for (l=0; l<ncols; l++) { 5378 *bufA++ = vals[l]; 5379 } 5380 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5381 } 5382 } 5383 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5384 } 5385 /* recvs and sends of a-array are completed */ 5386 i = nrecvs; 5387 while (i--) { 5388 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5389 } 5390 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5391 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5392 5393 if (scall == MAT_INITIAL_MATRIX) { 5394 /* put together the new matrix */ 5395 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5396 5397 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5398 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5399 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5400 b_oth->free_a = PETSC_TRUE; 5401 b_oth->free_ij = PETSC_TRUE; 5402 b_oth->nonew = 0; 5403 5404 ierr = PetscFree(bufj);CHKERRQ(ierr); 5405 if (!startsj_s || !bufa_ptr) { 5406 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5407 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5408 } else { 5409 *startsj_s = sstartsj; 5410 *startsj_r = rstartsj; 5411 *bufa_ptr = bufa; 5412 } 5413 } 5414 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5415 PetscFunctionReturn(0); 5416 } 5417 5418 #undef __FUNCT__ 5419 #define __FUNCT__ "MatGetCommunicationStructs" 5420 /*@C 5421 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5422 5423 Not Collective 5424 5425 Input Parameters: 5426 . A - The matrix in mpiaij format 5427 5428 Output Parameter: 5429 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5430 . colmap - A map from global column index to local index into lvec 5431 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5432 5433 Level: developer 5434 5435 @*/ 5436 #if defined(PETSC_USE_CTABLE) 5437 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5438 #else 5439 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5440 #endif 5441 { 5442 Mat_MPIAIJ *a; 5443 5444 PetscFunctionBegin; 5445 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5446 PetscValidPointer(lvec, 2); 5447 PetscValidPointer(colmap, 3); 5448 PetscValidPointer(multScatter, 4); 5449 a = (Mat_MPIAIJ*) A->data; 5450 if (lvec) *lvec = a->lvec; 5451 if (colmap) *colmap = a->colmap; 5452 if (multScatter) *multScatter = a->Mvctx; 5453 PetscFunctionReturn(0); 5454 } 5455 5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5459 5460 #undef __FUNCT__ 5461 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5462 /* 5463 Computes (B'*A')' since computing B*A directly is untenable 5464 5465 n p p 5466 ( ) ( ) ( ) 5467 m ( A ) * n ( B ) = m ( C ) 5468 ( ) ( ) ( ) 5469 5470 */ 5471 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5472 { 5473 PetscErrorCode ierr; 5474 Mat At,Bt,Ct; 5475 5476 PetscFunctionBegin; 5477 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5478 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5479 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5480 ierr = MatDestroy(&At);CHKERRQ(ierr); 5481 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5482 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5483 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5484 PetscFunctionReturn(0); 5485 } 5486 5487 #undef __FUNCT__ 5488 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5489 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5490 { 5491 PetscErrorCode ierr; 5492 PetscInt m=A->rmap->n,n=B->cmap->n; 5493 Mat Cmat; 5494 5495 PetscFunctionBegin; 5496 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5497 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5498 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5499 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5500 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5501 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5502 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5503 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5504 5505 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5506 5507 *C = Cmat; 5508 PetscFunctionReturn(0); 5509 } 5510 5511 /* ----------------------------------------------------------------*/ 5512 #undef __FUNCT__ 5513 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5514 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5515 { 5516 PetscErrorCode ierr; 5517 5518 PetscFunctionBegin; 5519 if (scall == MAT_INITIAL_MATRIX) { 5520 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5521 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5522 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5523 } 5524 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5525 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5526 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5527 PetscFunctionReturn(0); 5528 } 5529 5530 #if defined(PETSC_HAVE_MUMPS) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5532 #endif 5533 #if defined(PETSC_HAVE_PASTIX) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5535 #endif 5536 #if defined(PETSC_HAVE_SUPERLU_DIST) 5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5538 #endif 5539 #if defined(PETSC_HAVE_CLIQUE) 5540 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5541 #endif 5542 5543 /*MC 5544 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5545 5546 Options Database Keys: 5547 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5548 5549 Level: beginner 5550 5551 .seealso: MatCreateAIJ() 5552 M*/ 5553 5554 #undef __FUNCT__ 5555 #define __FUNCT__ "MatCreate_MPIAIJ" 5556 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5557 { 5558 Mat_MPIAIJ *b; 5559 PetscErrorCode ierr; 5560 PetscMPIInt size; 5561 5562 PetscFunctionBegin; 5563 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5564 5565 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5566 B->data = (void*)b; 5567 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5568 B->assembled = PETSC_FALSE; 5569 B->insertmode = NOT_SET_VALUES; 5570 b->size = size; 5571 5572 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5573 5574 /* build cache for off array entries formed */ 5575 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5576 5577 b->donotstash = PETSC_FALSE; 5578 b->colmap = 0; 5579 b->garray = 0; 5580 b->roworiented = PETSC_TRUE; 5581 5582 /* stuff used for matrix vector multiply */ 5583 b->lvec = NULL; 5584 b->Mvctx = NULL; 5585 5586 /* stuff for MatGetRow() */ 5587 b->rowindices = 0; 5588 b->rowvalues = 0; 5589 b->getrowactive = PETSC_FALSE; 5590 5591 /* flexible pointer used in CUSP/CUSPARSE classes */ 5592 b->spptr = NULL; 5593 5594 #if defined(PETSC_HAVE_MUMPS) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5596 #endif 5597 #if defined(PETSC_HAVE_PASTIX) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5599 #endif 5600 #if defined(PETSC_HAVE_SUPERLU_DIST) 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5602 #endif 5603 #if defined(PETSC_HAVE_CLIQUE) 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5605 #endif 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 #undef __FUNCT__ 5624 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5625 /*@ 5626 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5627 and "off-diagonal" part of the matrix in CSR format. 5628 5629 Collective on MPI_Comm 5630 5631 Input Parameters: 5632 + comm - MPI communicator 5633 . m - number of local rows (Cannot be PETSC_DECIDE) 5634 . n - This value should be the same as the local size used in creating the 5635 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5636 calculated if N is given) For square matrices n is almost always m. 5637 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5638 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5639 . i - row indices for "diagonal" portion of matrix 5640 . j - column indices 5641 . a - matrix values 5642 . oi - row indices for "off-diagonal" portion of matrix 5643 . oj - column indices 5644 - oa - matrix values 5645 5646 Output Parameter: 5647 . mat - the matrix 5648 5649 Level: advanced 5650 5651 Notes: 5652 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5653 must free the arrays once the matrix has been destroyed and not before. 5654 5655 The i and j indices are 0 based 5656 5657 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5658 5659 This sets local rows and cannot be used to set off-processor values. 5660 5661 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5662 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5663 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5664 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5665 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5666 communication if it is known that only local entries will be set. 5667 5668 .keywords: matrix, aij, compressed row, sparse, parallel 5669 5670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5671 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5672 @*/ 5673 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5674 { 5675 PetscErrorCode ierr; 5676 Mat_MPIAIJ *maij; 5677 5678 PetscFunctionBegin; 5679 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5680 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5681 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5682 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5683 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5684 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5685 maij = (Mat_MPIAIJ*) (*mat)->data; 5686 5687 (*mat)->preallocated = PETSC_TRUE; 5688 5689 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5690 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5691 5692 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5693 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5694 5695 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 5700 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 /* 5707 Special version for direct calls from Fortran 5708 */ 5709 #include <petsc-private/fortranimpl.h> 5710 5711 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5712 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5713 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5714 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5715 #endif 5716 5717 /* Change these macros so can be used in void function */ 5718 #undef CHKERRQ 5719 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5720 #undef SETERRQ2 5721 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5722 #undef SETERRQ3 5723 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5724 #undef SETERRQ 5725 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5726 5727 #undef __FUNCT__ 5728 #define __FUNCT__ "matsetvaluesmpiaij_" 5729 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5730 { 5731 Mat mat = *mmat; 5732 PetscInt m = *mm, n = *mn; 5733 InsertMode addv = *maddv; 5734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5735 PetscScalar value; 5736 PetscErrorCode ierr; 5737 5738 MatCheckPreallocated(mat,1); 5739 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5740 5741 #if defined(PETSC_USE_DEBUG) 5742 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5743 #endif 5744 { 5745 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5746 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5747 PetscBool roworiented = aij->roworiented; 5748 5749 /* Some Variables required in the macro */ 5750 Mat A = aij->A; 5751 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5752 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5753 MatScalar *aa = a->a; 5754 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5755 Mat B = aij->B; 5756 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5757 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5758 MatScalar *ba = b->a; 5759 5760 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5761 PetscInt nonew = a->nonew; 5762 MatScalar *ap1,*ap2; 5763 5764 PetscFunctionBegin; 5765 for (i=0; i<m; i++) { 5766 if (im[i] < 0) continue; 5767 #if defined(PETSC_USE_DEBUG) 5768 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5769 #endif 5770 if (im[i] >= rstart && im[i] < rend) { 5771 row = im[i] - rstart; 5772 lastcol1 = -1; 5773 rp1 = aj + ai[row]; 5774 ap1 = aa + ai[row]; 5775 rmax1 = aimax[row]; 5776 nrow1 = ailen[row]; 5777 low1 = 0; 5778 high1 = nrow1; 5779 lastcol2 = -1; 5780 rp2 = bj + bi[row]; 5781 ap2 = ba + bi[row]; 5782 rmax2 = bimax[row]; 5783 nrow2 = bilen[row]; 5784 low2 = 0; 5785 high2 = nrow2; 5786 5787 for (j=0; j<n; j++) { 5788 if (roworiented) value = v[i*n+j]; 5789 else value = v[i+j*m]; 5790 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5791 if (in[j] >= cstart && in[j] < cend) { 5792 col = in[j] - cstart; 5793 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5794 } else if (in[j] < 0) continue; 5795 #if defined(PETSC_USE_DEBUG) 5796 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5797 #endif 5798 else { 5799 if (mat->was_assembled) { 5800 if (!aij->colmap) { 5801 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5802 } 5803 #if defined(PETSC_USE_CTABLE) 5804 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5805 col--; 5806 #else 5807 col = aij->colmap[in[j]] - 1; 5808 #endif 5809 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5810 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5811 col = in[j]; 5812 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5813 B = aij->B; 5814 b = (Mat_SeqAIJ*)B->data; 5815 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5816 rp2 = bj + bi[row]; 5817 ap2 = ba + bi[row]; 5818 rmax2 = bimax[row]; 5819 nrow2 = bilen[row]; 5820 low2 = 0; 5821 high2 = nrow2; 5822 bm = aij->B->rmap->n; 5823 ba = b->a; 5824 } 5825 } else col = in[j]; 5826 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5827 } 5828 } 5829 } else if (!aij->donotstash) { 5830 if (roworiented) { 5831 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5832 } else { 5833 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } 5835 } 5836 } 5837 } 5838 PetscFunctionReturnVoid(); 5839 } 5840 5841