1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 /* used by MatAXPY() */ 730 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 731 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 732 733 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 734 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 735 736 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 737 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 738 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 739 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 740 } 741 PetscFunctionReturn(0); 742 } 743 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 747 { 748 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 749 PetscErrorCode ierr; 750 751 PetscFunctionBegin; 752 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 753 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 754 PetscFunctionReturn(0); 755 } 756 757 #undef __FUNCT__ 758 #define __FUNCT__ "MatZeroRows_MPIAIJ" 759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 760 { 761 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 762 PetscInt *owners = A->rmap->range; 763 PetscInt n = A->rmap->n; 764 PetscSF sf; 765 PetscInt *lrows; 766 PetscSFNode *rrows; 767 PetscInt r, p = 0, len = 0; 768 PetscErrorCode ierr; 769 770 PetscFunctionBegin; 771 /* Create SF where leaves are input rows and roots are owned rows */ 772 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 773 for (r = 0; r < n; ++r) lrows[r] = -1; 774 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 775 for (r = 0; r < N; ++r) { 776 const PetscInt idx = rows[r]; 777 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 778 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 779 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 780 } 781 if (A->nooffproczerorows) { 782 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 783 lrows[len++] = idx - owners[p]; 784 } else { 785 rrows[r].rank = p; 786 rrows[r].index = rows[r] - owners[p]; 787 } 788 } 789 if (!A->nooffproczerorows) { 790 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 791 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 792 /* Collect flags for rows to be zeroed */ 793 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 794 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 795 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 796 /* Compress and put in row numbers */ 797 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 798 } 799 /* fix right hand side if needed */ 800 if (x && b) { 801 const PetscScalar *xx; 802 PetscScalar *bb; 803 804 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 805 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 806 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 807 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 808 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 809 } 810 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 811 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1154 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1155 #if defined(PETSC_HAVE_ELEMENTAL) 1156 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1157 #endif 1158 PetscFunctionReturn(0); 1159 } 1160 1161 #undef __FUNCT__ 1162 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1163 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1168 PetscErrorCode ierr; 1169 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1170 int fd; 1171 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1172 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1173 PetscScalar *column_values; 1174 PetscInt message_count,flowcontrolcount; 1175 FILE *file; 1176 1177 PetscFunctionBegin; 1178 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1180 nz = A->nz + B->nz; 1181 if (!rank) { 1182 header[0] = MAT_FILE_CLASSID; 1183 header[1] = mat->rmap->N; 1184 header[2] = mat->cmap->N; 1185 1186 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1187 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1188 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1189 /* get largest number of rows any processor has */ 1190 rlen = mat->rmap->n; 1191 range = mat->rmap->range; 1192 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1193 } else { 1194 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 rlen = mat->rmap->n; 1196 } 1197 1198 /* load up the local row counts */ 1199 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1200 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1201 1202 /* store the row lengths to the file */ 1203 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1204 if (!rank) { 1205 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1206 for (i=1; i<size; i++) { 1207 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1208 rlen = range[i+1] - range[i]; 1209 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1211 } 1212 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1213 } else { 1214 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1215 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1216 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1217 } 1218 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1219 1220 /* load up the local column indices */ 1221 nzmax = nz; /* th processor needs space a largest processor needs */ 1222 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1223 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1224 cnt = 0; 1225 for (i=0; i<mat->rmap->n; i++) { 1226 for (j=B->i[i]; j<B->i[i+1]; j++) { 1227 if ((col = garray[B->j[j]]) > cstart) break; 1228 column_indices[cnt++] = col; 1229 } 1230 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1231 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1232 } 1233 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1234 1235 /* store the column indices to the file */ 1236 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1237 if (!rank) { 1238 MPI_Status status; 1239 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 for (i=1; i<size; i++) { 1241 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1242 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1243 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1244 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1246 } 1247 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1248 } else { 1249 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1250 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1251 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1252 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1253 } 1254 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1255 1256 /* load up the local column values */ 1257 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1258 cnt = 0; 1259 for (i=0; i<mat->rmap->n; i++) { 1260 for (j=B->i[i]; j<B->i[i+1]; j++) { 1261 if (garray[B->j[j]] > cstart) break; 1262 column_values[cnt++] = B->a[j]; 1263 } 1264 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1265 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1266 } 1267 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1268 1269 /* store the column values to the file */ 1270 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1271 if (!rank) { 1272 MPI_Status status; 1273 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1274 for (i=1; i<size; i++) { 1275 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1276 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1277 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1278 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1280 } 1281 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1282 } else { 1283 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1284 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1285 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1286 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1287 } 1288 ierr = PetscFree(column_values);CHKERRQ(ierr); 1289 1290 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1291 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #include <petscdraw.h> 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1298 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1299 { 1300 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1301 PetscErrorCode ierr; 1302 PetscMPIInt rank = aij->rank,size = aij->size; 1303 PetscBool isdraw,iascii,isbinary; 1304 PetscViewer sviewer; 1305 PetscViewerFormat format; 1306 1307 PetscFunctionBegin; 1308 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1311 if (iascii) { 1312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1313 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1314 MatInfo info; 1315 PetscBool inodes; 1316 1317 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1318 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1319 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1320 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1321 if (!inodes) { 1322 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1323 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1324 } else { 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1326 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1327 } 1328 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1330 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1334 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1335 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1338 PetscInt inodecount,inodelimit,*inodes; 1339 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1340 if (inodes) { 1341 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1344 } 1345 PetscFunctionReturn(0); 1346 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1347 PetscFunctionReturn(0); 1348 } 1349 } else if (isbinary) { 1350 if (size == 1) { 1351 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1352 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1353 } else { 1354 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1355 } 1356 PetscFunctionReturn(0); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1361 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1362 } 1363 1364 { 1365 /* assemble the entire matrix onto first processor. */ 1366 Mat A; 1367 Mat_SeqAIJ *Aloc; 1368 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1369 MatScalar *a; 1370 1371 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1372 if (!rank) { 1373 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1374 } else { 1375 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1376 } 1377 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1378 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1379 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1380 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1381 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1382 1383 /* copy over the A part */ 1384 Aloc = (Mat_SeqAIJ*)aij->A->data; 1385 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1386 row = mat->rmap->rstart; 1387 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1388 for (i=0; i<m; i++) { 1389 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1390 row++; 1391 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1392 } 1393 aj = Aloc->j; 1394 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1395 1396 /* copy over the B part */ 1397 Aloc = (Mat_SeqAIJ*)aij->B->data; 1398 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1399 row = mat->rmap->rstart; 1400 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1401 ct = cols; 1402 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1403 for (i=0; i<m; i++) { 1404 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1405 row++; 1406 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1407 } 1408 ierr = PetscFree(ct);CHKERRQ(ierr); 1409 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1410 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 /* 1412 Everyone has to call to draw the matrix since the graphics waits are 1413 synchronized across all processors that share the PetscDraw object 1414 */ 1415 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1416 if (!rank) { 1417 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1420 ierr = MatDestroy(&A);CHKERRQ(ierr); 1421 } 1422 PetscFunctionReturn(0); 1423 } 1424 1425 #undef __FUNCT__ 1426 #define __FUNCT__ "MatView_MPIAIJ" 1427 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1428 { 1429 PetscErrorCode ierr; 1430 PetscBool iascii,isdraw,issocket,isbinary; 1431 1432 PetscFunctionBegin; 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1437 if (iascii || isdraw || isbinary || issocket) { 1438 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1439 } 1440 PetscFunctionReturn(0); 1441 } 1442 1443 #undef __FUNCT__ 1444 #define __FUNCT__ "MatSOR_MPIAIJ" 1445 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1446 { 1447 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1448 PetscErrorCode ierr; 1449 Vec bb1 = 0; 1450 PetscBool hasop; 1451 1452 PetscFunctionBegin; 1453 if (flag == SOR_APPLY_UPPER) { 1454 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } 1457 1458 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1459 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1460 } 1461 1462 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1463 if (flag & SOR_ZERO_INITIAL_GUESS) { 1464 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1465 its--; 1466 } 1467 1468 while (its--) { 1469 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1470 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 1472 /* update rhs: bb1 = bb - B*x */ 1473 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1474 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1475 1476 /* local sweep */ 1477 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1478 } 1479 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1480 if (flag & SOR_ZERO_INITIAL_GUESS) { 1481 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1482 its--; 1483 } 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_EISENSTAT) { 1512 Vec xx1; 1513 1514 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1515 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1516 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 if (!mat->diag) { 1520 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1521 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1522 } 1523 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1524 if (hasop) { 1525 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1526 } else { 1527 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1528 } 1529 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1530 1531 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1532 1533 /* local sweep */ 1534 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1535 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1536 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1537 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1538 1539 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatPermute_MPIAIJ" 1545 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1546 { 1547 Mat aA,aB,Aperm; 1548 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1549 PetscScalar *aa,*ba; 1550 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1551 PetscSF rowsf,sf; 1552 IS parcolp = NULL; 1553 PetscBool done; 1554 PetscErrorCode ierr; 1555 1556 PetscFunctionBegin; 1557 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1558 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1559 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1560 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1561 1562 /* Invert row permutation to find out where my rows should go */ 1563 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1564 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1565 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1566 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1567 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1568 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 1570 /* Invert column permutation to find out where my columns should go */ 1571 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1572 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1573 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1574 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1575 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1578 1579 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1580 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1581 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1582 1583 /* Find out where my gcols should go */ 1584 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1585 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1587 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1588 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1589 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1590 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1592 1593 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1594 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1595 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1596 for (i=0; i<m; i++) { 1597 PetscInt row = rdest[i],rowner; 1598 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1599 for (j=ai[i]; j<ai[i+1]; j++) { 1600 PetscInt cowner,col = cdest[aj[j]]; 1601 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1602 if (rowner == cowner) dnnz[i]++; 1603 else onnz[i]++; 1604 } 1605 for (j=bi[i]; j<bi[i+1]; j++) { 1606 PetscInt cowner,col = gcdest[bj[j]]; 1607 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1608 if (rowner == cowner) dnnz[i]++; 1609 else onnz[i]++; 1610 } 1611 } 1612 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1615 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1616 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1617 1618 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1619 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1620 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1621 for (i=0; i<m; i++) { 1622 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1623 PetscInt j0,rowlen; 1624 rowlen = ai[i+1] - ai[i]; 1625 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1626 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1627 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1628 } 1629 rowlen = bi[i+1] - bi[i]; 1630 for (j0=j=0; j<rowlen; j0=j) { 1631 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1632 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1633 } 1634 } 1635 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1636 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1637 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1638 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1639 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1640 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1641 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1643 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1644 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1645 *B = Aperm; 1646 PetscFunctionReturn(0); 1647 } 1648 1649 #undef __FUNCT__ 1650 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1651 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1652 { 1653 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1654 Mat A = mat->A,B = mat->B; 1655 PetscErrorCode ierr; 1656 PetscReal isend[5],irecv[5]; 1657 1658 PetscFunctionBegin; 1659 info->block_size = 1.0; 1660 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1661 1662 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1663 isend[3] = info->memory; isend[4] = info->mallocs; 1664 1665 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1666 1667 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1668 isend[3] += info->memory; isend[4] += info->mallocs; 1669 if (flag == MAT_LOCAL) { 1670 info->nz_used = isend[0]; 1671 info->nz_allocated = isend[1]; 1672 info->nz_unneeded = isend[2]; 1673 info->memory = isend[3]; 1674 info->mallocs = isend[4]; 1675 } else if (flag == MAT_GLOBAL_MAX) { 1676 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1677 1678 info->nz_used = irecv[0]; 1679 info->nz_allocated = irecv[1]; 1680 info->nz_unneeded = irecv[2]; 1681 info->memory = irecv[3]; 1682 info->mallocs = irecv[4]; 1683 } else if (flag == MAT_GLOBAL_SUM) { 1684 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1685 1686 info->nz_used = irecv[0]; 1687 info->nz_allocated = irecv[1]; 1688 info->nz_unneeded = irecv[2]; 1689 info->memory = irecv[3]; 1690 info->mallocs = irecv[4]; 1691 } 1692 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1693 info->fill_ratio_needed = 0; 1694 info->factor_mallocs = 0; 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatSetOption_MPIAIJ" 1700 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1701 { 1702 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1703 PetscErrorCode ierr; 1704 1705 PetscFunctionBegin; 1706 switch (op) { 1707 case MAT_NEW_NONZERO_LOCATIONS: 1708 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1709 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1710 case MAT_KEEP_NONZERO_PATTERN: 1711 case MAT_NEW_NONZERO_LOCATION_ERR: 1712 case MAT_USE_INODES: 1713 case MAT_IGNORE_ZERO_ENTRIES: 1714 MatCheckPreallocated(A,1); 1715 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1716 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1717 break; 1718 case MAT_ROW_ORIENTED: 1719 a->roworiented = flg; 1720 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1723 break; 1724 case MAT_NEW_DIAGONALS: 1725 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1726 break; 1727 case MAT_IGNORE_OFF_PROC_ENTRIES: 1728 a->donotstash = flg; 1729 break; 1730 case MAT_SPD: 1731 A->spd_set = PETSC_TRUE; 1732 A->spd = flg; 1733 if (flg) { 1734 A->symmetric = PETSC_TRUE; 1735 A->structurally_symmetric = PETSC_TRUE; 1736 A->symmetric_set = PETSC_TRUE; 1737 A->structurally_symmetric_set = PETSC_TRUE; 1738 } 1739 break; 1740 case MAT_SYMMETRIC: 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_STRUCTURALLY_SYMMETRIC: 1744 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1745 break; 1746 case MAT_HERMITIAN: 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 break; 1749 case MAT_SYMMETRY_ETERNAL: 1750 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1751 break; 1752 default: 1753 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1754 } 1755 PetscFunctionReturn(0); 1756 } 1757 1758 #undef __FUNCT__ 1759 #define __FUNCT__ "MatGetRow_MPIAIJ" 1760 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1761 { 1762 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1763 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1764 PetscErrorCode ierr; 1765 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1766 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1767 PetscInt *cmap,*idx_p; 1768 1769 PetscFunctionBegin; 1770 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1771 mat->getrowactive = PETSC_TRUE; 1772 1773 if (!mat->rowvalues && (idx || v)) { 1774 /* 1775 allocate enough space to hold information from the longest row. 1776 */ 1777 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1778 PetscInt max = 1,tmp; 1779 for (i=0; i<matin->rmap->n; i++) { 1780 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1781 if (max < tmp) max = tmp; 1782 } 1783 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1784 } 1785 1786 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1787 lrow = row - rstart; 1788 1789 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1790 if (!v) {pvA = 0; pvB = 0;} 1791 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1792 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1793 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1794 nztot = nzA + nzB; 1795 1796 cmap = mat->garray; 1797 if (v || idx) { 1798 if (nztot) { 1799 /* Sort by increasing column numbers, assuming A and B already sorted */ 1800 PetscInt imark = -1; 1801 if (v) { 1802 *v = v_p = mat->rowvalues; 1803 for (i=0; i<nzB; i++) { 1804 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1805 else break; 1806 } 1807 imark = i; 1808 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1809 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1810 } 1811 if (idx) { 1812 *idx = idx_p = mat->rowindices; 1813 if (imark > -1) { 1814 for (i=0; i<imark; i++) { 1815 idx_p[i] = cmap[cworkB[i]]; 1816 } 1817 } else { 1818 for (i=0; i<nzB; i++) { 1819 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1820 else break; 1821 } 1822 imark = i; 1823 } 1824 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1825 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1826 } 1827 } else { 1828 if (idx) *idx = 0; 1829 if (v) *v = 0; 1830 } 1831 } 1832 *nz = nztot; 1833 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1834 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 #undef __FUNCT__ 1839 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1840 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1841 { 1842 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1843 1844 PetscFunctionBegin; 1845 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1846 aij->getrowactive = PETSC_FALSE; 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatNorm_MPIAIJ" 1852 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1853 { 1854 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1855 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1856 PetscErrorCode ierr; 1857 PetscInt i,j,cstart = mat->cmap->rstart; 1858 PetscReal sum = 0.0; 1859 MatScalar *v; 1860 1861 PetscFunctionBegin; 1862 if (aij->size == 1) { 1863 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1864 } else { 1865 if (type == NORM_FROBENIUS) { 1866 v = amat->a; 1867 for (i=0; i<amat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 v = bmat->a; 1871 for (i=0; i<bmat->nz; i++) { 1872 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1873 } 1874 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1875 *norm = PetscSqrtReal(*norm); 1876 } else if (type == NORM_1) { /* max column norm */ 1877 PetscReal *tmp,*tmp2; 1878 PetscInt *jj,*garray = aij->garray; 1879 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1880 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1881 *norm = 0.0; 1882 v = amat->a; jj = amat->j; 1883 for (j=0; j<amat->nz; j++) { 1884 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1885 } 1886 v = bmat->a; jj = bmat->j; 1887 for (j=0; j<bmat->nz; j++) { 1888 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1889 } 1890 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 for (j=0; j<mat->cmap->N; j++) { 1892 if (tmp2[j] > *norm) *norm = tmp2[j]; 1893 } 1894 ierr = PetscFree(tmp);CHKERRQ(ierr); 1895 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1896 } else if (type == NORM_INFINITY) { /* max row norm */ 1897 PetscReal ntemp = 0.0; 1898 for (j=0; j<aij->A->rmap->n; j++) { 1899 v = amat->a + amat->i[j]; 1900 sum = 0.0; 1901 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1902 sum += PetscAbsScalar(*v); v++; 1903 } 1904 v = bmat->a + bmat->i[j]; 1905 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1906 sum += PetscAbsScalar(*v); v++; 1907 } 1908 if (sum > ntemp) ntemp = sum; 1909 } 1910 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1911 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1912 } 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #undef __FUNCT__ 1917 #define __FUNCT__ "MatTranspose_MPIAIJ" 1918 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1919 { 1920 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1921 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1922 PetscErrorCode ierr; 1923 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1924 PetscInt cstart = A->cmap->rstart,ncol; 1925 Mat B; 1926 MatScalar *array; 1927 1928 PetscFunctionBegin; 1929 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1930 1931 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1932 ai = Aloc->i; aj = Aloc->j; 1933 bi = Bloc->i; bj = Bloc->j; 1934 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1935 PetscInt *d_nnz,*g_nnz,*o_nnz; 1936 PetscSFNode *oloc; 1937 PETSC_UNUSED PetscSF sf; 1938 1939 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1940 /* compute d_nnz for preallocation */ 1941 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1942 for (i=0; i<ai[ma]; i++) { 1943 d_nnz[aj[i]]++; 1944 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1945 } 1946 /* compute local off-diagonal contributions */ 1947 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1948 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1949 /* map those to global */ 1950 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1951 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1952 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1953 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1954 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1955 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1956 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1957 1958 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1959 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1960 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1961 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1962 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1963 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1964 } else { 1965 B = *matout; 1966 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1967 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1968 } 1969 1970 /* copy over the A part */ 1971 array = Aloc->a; 1972 row = A->rmap->rstart; 1973 for (i=0; i<ma; i++) { 1974 ncol = ai[i+1]-ai[i]; 1975 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1976 row++; 1977 array += ncol; aj += ncol; 1978 } 1979 aj = Aloc->j; 1980 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1981 1982 /* copy over the B part */ 1983 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1984 array = Bloc->a; 1985 row = A->rmap->rstart; 1986 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1987 cols_tmp = cols; 1988 for (i=0; i<mb; i++) { 1989 ncol = bi[i+1]-bi[i]; 1990 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1991 row++; 1992 array += ncol; cols_tmp += ncol; 1993 } 1994 ierr = PetscFree(cols);CHKERRQ(ierr); 1995 1996 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1997 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1999 *matout = B; 2000 } else { 2001 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2002 } 2003 PetscFunctionReturn(0); 2004 } 2005 2006 #undef __FUNCT__ 2007 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2008 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2009 { 2010 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2011 Mat a = aij->A,b = aij->B; 2012 PetscErrorCode ierr; 2013 PetscInt s1,s2,s3; 2014 2015 PetscFunctionBegin; 2016 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2017 if (rr) { 2018 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2019 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2020 /* Overlap communication with computation. */ 2021 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2022 } 2023 if (ll) { 2024 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2025 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2026 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2027 } 2028 /* scale the diagonal block */ 2029 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2030 2031 if (rr) { 2032 /* Do a scatter end and then right scale the off-diagonal block */ 2033 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2034 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2035 } 2036 PetscFunctionReturn(0); 2037 } 2038 2039 #undef __FUNCT__ 2040 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2041 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2042 { 2043 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2044 PetscErrorCode ierr; 2045 2046 PetscFunctionBegin; 2047 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 #undef __FUNCT__ 2052 #define __FUNCT__ "MatEqual_MPIAIJ" 2053 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2054 { 2055 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2056 Mat a,b,c,d; 2057 PetscBool flg; 2058 PetscErrorCode ierr; 2059 2060 PetscFunctionBegin; 2061 a = matA->A; b = matA->B; 2062 c = matB->A; d = matB->B; 2063 2064 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2065 if (flg) { 2066 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2067 } 2068 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2069 PetscFunctionReturn(0); 2070 } 2071 2072 #undef __FUNCT__ 2073 #define __FUNCT__ "MatCopy_MPIAIJ" 2074 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2075 { 2076 PetscErrorCode ierr; 2077 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2078 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2079 2080 PetscFunctionBegin; 2081 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2082 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2083 /* because of the column compression in the off-processor part of the matrix a->B, 2084 the number of columns in a->B and b->B may be different, hence we cannot call 2085 the MatCopy() directly on the two parts. If need be, we can provide a more 2086 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2087 then copying the submatrices */ 2088 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2089 } else { 2090 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2091 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 #undef __FUNCT__ 2097 #define __FUNCT__ "MatSetUp_MPIAIJ" 2098 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2099 { 2100 PetscErrorCode ierr; 2101 2102 PetscFunctionBegin; 2103 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* 2108 Computes the number of nonzeros per row needed for preallocation when X and Y 2109 have different nonzero structure. 2110 */ 2111 #undef __FUNCT__ 2112 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2113 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2114 { 2115 PetscInt i,j,k,nzx,nzy; 2116 2117 PetscFunctionBegin; 2118 /* Set the number of nonzeros in the new matrix */ 2119 for (i=0; i<m; i++) { 2120 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2121 nzx = xi[i+1] - xi[i]; 2122 nzy = yi[i+1] - yi[i]; 2123 nnz[i] = 0; 2124 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2125 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2126 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2127 nnz[i]++; 2128 } 2129 for (; k<nzy; k++) nnz[i]++; 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2135 #undef __FUNCT__ 2136 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2137 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2138 { 2139 PetscErrorCode ierr; 2140 PetscInt m = Y->rmap->N; 2141 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2142 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2143 2144 PetscFunctionBegin; 2145 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2146 PetscFunctionReturn(0); 2147 } 2148 2149 #undef __FUNCT__ 2150 #define __FUNCT__ "MatAXPY_MPIAIJ" 2151 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2152 { 2153 PetscErrorCode ierr; 2154 PetscInt i; 2155 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2156 PetscBLASInt bnz,one=1; 2157 Mat_SeqAIJ *x,*y; 2158 2159 PetscFunctionBegin; 2160 if (str == SAME_NONZERO_PATTERN) { 2161 PetscScalar alpha = a; 2162 x = (Mat_SeqAIJ*)xx->A->data; 2163 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2164 y = (Mat_SeqAIJ*)yy->A->data; 2165 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2166 x = (Mat_SeqAIJ*)xx->B->data; 2167 y = (Mat_SeqAIJ*)yy->B->data; 2168 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2169 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2170 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2171 } else if (str == SUBSET_NONZERO_PATTERN) { 2172 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2173 2174 x = (Mat_SeqAIJ*)xx->B->data; 2175 y = (Mat_SeqAIJ*)yy->B->data; 2176 if (y->xtoy && y->XtoY != xx->B) { 2177 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2178 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2179 } 2180 if (!y->xtoy) { /* get xtoy */ 2181 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2182 y->XtoY = xx->B; 2183 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2184 } 2185 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2186 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2187 } else { 2188 Mat B; 2189 PetscInt *nnz_d,*nnz_o; 2190 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2191 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2192 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2193 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2194 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2195 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2196 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2197 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2198 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2199 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2200 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2201 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2202 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2203 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2204 } 2205 PetscFunctionReturn(0); 2206 } 2207 2208 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatConjugate_MPIAIJ" 2212 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2213 { 2214 #if defined(PETSC_USE_COMPLEX) 2215 PetscErrorCode ierr; 2216 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2217 2218 PetscFunctionBegin; 2219 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2220 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2221 #else 2222 PetscFunctionBegin; 2223 #endif 2224 PetscFunctionReturn(0); 2225 } 2226 2227 #undef __FUNCT__ 2228 #define __FUNCT__ "MatRealPart_MPIAIJ" 2229 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2230 { 2231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2232 PetscErrorCode ierr; 2233 2234 PetscFunctionBegin; 2235 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2236 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2237 PetscFunctionReturn(0); 2238 } 2239 2240 #undef __FUNCT__ 2241 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2242 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2243 { 2244 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2245 PetscErrorCode ierr; 2246 2247 PetscFunctionBegin; 2248 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2249 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #if defined(PETSC_HAVE_PBGL) 2254 2255 #include <boost/parallel/mpi/bsp_process_group.hpp> 2256 #include <boost/graph/distributed/ilu_default_graph.hpp> 2257 #include <boost/graph/distributed/ilu_0_block.hpp> 2258 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2259 #include <boost/graph/distributed/petsc/interface.hpp> 2260 #include <boost/multi_array.hpp> 2261 #include <boost/parallel/distributed_property_map->hpp> 2262 2263 #undef __FUNCT__ 2264 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2265 /* 2266 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2267 */ 2268 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2269 { 2270 namespace petsc = boost::distributed::petsc; 2271 2272 namespace graph_dist = boost::graph::distributed; 2273 using boost::graph::distributed::ilu_default::process_group_type; 2274 using boost::graph::ilu_permuted; 2275 2276 PetscBool row_identity, col_identity; 2277 PetscContainer c; 2278 PetscInt m, n, M, N; 2279 PetscErrorCode ierr; 2280 2281 PetscFunctionBegin; 2282 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2283 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2284 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2285 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2286 2287 process_group_type pg; 2288 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2289 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2290 lgraph_type& level_graph = *lgraph_p; 2291 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2292 2293 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2294 ilu_permuted(level_graph); 2295 2296 /* put together the new matrix */ 2297 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2298 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2299 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2300 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2301 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2302 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2303 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2304 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 2306 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2307 ierr = PetscContainerSetPointer(c, lgraph_p); 2308 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2309 ierr = PetscContainerDestroy(&c); 2310 PetscFunctionReturn(0); 2311 } 2312 2313 #undef __FUNCT__ 2314 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2315 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2316 { 2317 PetscFunctionBegin; 2318 PetscFunctionReturn(0); 2319 } 2320 2321 #undef __FUNCT__ 2322 #define __FUNCT__ "MatSolve_MPIAIJ" 2323 /* 2324 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2325 */ 2326 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2327 { 2328 namespace graph_dist = boost::graph::distributed; 2329 2330 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2331 lgraph_type *lgraph_p; 2332 PetscContainer c; 2333 PetscErrorCode ierr; 2334 2335 PetscFunctionBegin; 2336 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2337 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2338 ierr = VecCopy(b, x);CHKERRQ(ierr); 2339 2340 PetscScalar *array_x; 2341 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2342 PetscInt sx; 2343 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2344 2345 PetscScalar *array_b; 2346 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2347 PetscInt sb; 2348 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2349 2350 lgraph_type& level_graph = *lgraph_p; 2351 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2352 2353 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2354 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2355 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2356 2357 typedef boost::iterator_property_map<array_ref_type::iterator, 2358 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2359 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2360 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2361 2362 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2363 PetscFunctionReturn(0); 2364 } 2365 #endif 2366 2367 #undef __FUNCT__ 2368 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2369 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2370 { 2371 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2372 PetscErrorCode ierr; 2373 PetscInt i,*idxb = 0; 2374 PetscScalar *va,*vb; 2375 Vec vtmp; 2376 2377 PetscFunctionBegin; 2378 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2379 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2380 if (idx) { 2381 for (i=0; i<A->rmap->n; i++) { 2382 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2383 } 2384 } 2385 2386 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2387 if (idx) { 2388 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2389 } 2390 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2391 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2392 2393 for (i=0; i<A->rmap->n; i++) { 2394 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2395 va[i] = vb[i]; 2396 if (idx) idx[i] = a->garray[idxb[i]]; 2397 } 2398 } 2399 2400 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2401 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2402 ierr = PetscFree(idxb);CHKERRQ(ierr); 2403 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2409 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2410 { 2411 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2412 PetscErrorCode ierr; 2413 PetscInt i,*idxb = 0; 2414 PetscScalar *va,*vb; 2415 Vec vtmp; 2416 2417 PetscFunctionBegin; 2418 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2419 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2420 if (idx) { 2421 for (i=0; i<A->cmap->n; i++) { 2422 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2423 } 2424 } 2425 2426 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2427 if (idx) { 2428 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2429 } 2430 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2431 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2432 2433 for (i=0; i<A->rmap->n; i++) { 2434 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2435 va[i] = vb[i]; 2436 if (idx) idx[i] = a->garray[idxb[i]]; 2437 } 2438 } 2439 2440 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2441 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2442 ierr = PetscFree(idxb);CHKERRQ(ierr); 2443 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2444 PetscFunctionReturn(0); 2445 } 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2449 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2450 { 2451 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2452 PetscInt n = A->rmap->n; 2453 PetscInt cstart = A->cmap->rstart; 2454 PetscInt *cmap = mat->garray; 2455 PetscInt *diagIdx, *offdiagIdx; 2456 Vec diagV, offdiagV; 2457 PetscScalar *a, *diagA, *offdiagA; 2458 PetscInt r; 2459 PetscErrorCode ierr; 2460 2461 PetscFunctionBegin; 2462 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2463 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2464 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2465 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2466 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2467 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2468 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2469 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2470 for (r = 0; r < n; ++r) { 2471 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2472 a[r] = diagA[r]; 2473 idx[r] = cstart + diagIdx[r]; 2474 } else { 2475 a[r] = offdiagA[r]; 2476 idx[r] = cmap[offdiagIdx[r]]; 2477 } 2478 } 2479 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2480 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2481 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2482 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2483 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2484 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2490 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2491 { 2492 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2493 PetscInt n = A->rmap->n; 2494 PetscInt cstart = A->cmap->rstart; 2495 PetscInt *cmap = mat->garray; 2496 PetscInt *diagIdx, *offdiagIdx; 2497 Vec diagV, offdiagV; 2498 PetscScalar *a, *diagA, *offdiagA; 2499 PetscInt r; 2500 PetscErrorCode ierr; 2501 2502 PetscFunctionBegin; 2503 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2504 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2505 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2506 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2507 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2508 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2509 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2510 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2511 for (r = 0; r < n; ++r) { 2512 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2513 a[r] = diagA[r]; 2514 idx[r] = cstart + diagIdx[r]; 2515 } else { 2516 a[r] = offdiagA[r]; 2517 idx[r] = cmap[offdiagIdx[r]]; 2518 } 2519 } 2520 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2521 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2522 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2523 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2524 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2525 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 #undef __FUNCT__ 2530 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2531 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2532 { 2533 PetscErrorCode ierr; 2534 Mat *dummy; 2535 2536 PetscFunctionBegin; 2537 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2538 *newmat = *dummy; 2539 ierr = PetscFree(dummy);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2545 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2546 { 2547 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2548 PetscErrorCode ierr; 2549 2550 PetscFunctionBegin; 2551 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2557 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2558 { 2559 PetscErrorCode ierr; 2560 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2561 2562 PetscFunctionBegin; 2563 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2564 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2565 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2566 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2567 PetscFunctionReturn(0); 2568 } 2569 2570 /* -------------------------------------------------------------------*/ 2571 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2572 MatGetRow_MPIAIJ, 2573 MatRestoreRow_MPIAIJ, 2574 MatMult_MPIAIJ, 2575 /* 4*/ MatMultAdd_MPIAIJ, 2576 MatMultTranspose_MPIAIJ, 2577 MatMultTransposeAdd_MPIAIJ, 2578 #if defined(PETSC_HAVE_PBGL) 2579 MatSolve_MPIAIJ, 2580 #else 2581 0, 2582 #endif 2583 0, 2584 0, 2585 /*10*/ 0, 2586 0, 2587 0, 2588 MatSOR_MPIAIJ, 2589 MatTranspose_MPIAIJ, 2590 /*15*/ MatGetInfo_MPIAIJ, 2591 MatEqual_MPIAIJ, 2592 MatGetDiagonal_MPIAIJ, 2593 MatDiagonalScale_MPIAIJ, 2594 MatNorm_MPIAIJ, 2595 /*20*/ MatAssemblyBegin_MPIAIJ, 2596 MatAssemblyEnd_MPIAIJ, 2597 MatSetOption_MPIAIJ, 2598 MatZeroEntries_MPIAIJ, 2599 /*24*/ MatZeroRows_MPIAIJ, 2600 0, 2601 #if defined(PETSC_HAVE_PBGL) 2602 0, 2603 #else 2604 0, 2605 #endif 2606 0, 2607 0, 2608 /*29*/ MatSetUp_MPIAIJ, 2609 #if defined(PETSC_HAVE_PBGL) 2610 0, 2611 #else 2612 0, 2613 #endif 2614 0, 2615 0, 2616 0, 2617 /*34*/ MatDuplicate_MPIAIJ, 2618 0, 2619 0, 2620 0, 2621 0, 2622 /*39*/ MatAXPY_MPIAIJ, 2623 MatGetSubMatrices_MPIAIJ, 2624 MatIncreaseOverlap_MPIAIJ, 2625 MatGetValues_MPIAIJ, 2626 MatCopy_MPIAIJ, 2627 /*44*/ MatGetRowMax_MPIAIJ, 2628 MatScale_MPIAIJ, 2629 0, 2630 MatDiagonalSet_MPIAIJ, 2631 MatZeroRowsColumns_MPIAIJ, 2632 /*49*/ MatSetRandom_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*54*/ MatFDColoringCreate_MPIXAIJ, 2638 0, 2639 MatSetUnfactored_MPIAIJ, 2640 MatPermute_MPIAIJ, 2641 0, 2642 /*59*/ MatGetSubMatrix_MPIAIJ, 2643 MatDestroy_MPIAIJ, 2644 MatView_MPIAIJ, 2645 0, 2646 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2647 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2648 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2649 0, 2650 0, 2651 0, 2652 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2653 MatGetRowMinAbs_MPIAIJ, 2654 0, 2655 MatSetColoring_MPIAIJ, 2656 0, 2657 MatSetValuesAdifor_MPIAIJ, 2658 /*75*/ MatFDColoringApply_AIJ, 2659 0, 2660 0, 2661 0, 2662 MatFindZeroDiagonals_MPIAIJ, 2663 /*80*/ 0, 2664 0, 2665 0, 2666 /*83*/ MatLoad_MPIAIJ, 2667 0, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2673 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2674 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2675 MatPtAP_MPIAIJ_MPIAIJ, 2676 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2677 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2678 0, 2679 0, 2680 0, 2681 0, 2682 /*99*/ 0, 2683 0, 2684 0, 2685 MatConjugate_MPIAIJ, 2686 0, 2687 /*104*/MatSetValuesRow_MPIAIJ, 2688 MatRealPart_MPIAIJ, 2689 MatImaginaryPart_MPIAIJ, 2690 0, 2691 0, 2692 /*109*/0, 2693 0, 2694 MatGetRowMin_MPIAIJ, 2695 0, 2696 0, 2697 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*119*/0, 2703 0, 2704 0, 2705 0, 2706 MatGetMultiProcBlock_MPIAIJ, 2707 /*124*/MatFindNonzeroRows_MPIAIJ, 2708 MatGetColumnNorms_MPIAIJ, 2709 MatInvertBlockDiagonal_MPIAIJ, 2710 0, 2711 MatGetSubMatricesParallel_MPIAIJ, 2712 /*129*/0, 2713 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2714 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2715 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2716 0, 2717 /*134*/0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /*139*/0, 2723 0, 2724 0, 2725 MatFDColoringSetUp_MPIXAIJ, 2726 0, 2727 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2728 }; 2729 2730 /* ----------------------------------------------------------------------------------------*/ 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2734 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2735 { 2736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2737 PetscErrorCode ierr; 2738 2739 PetscFunctionBegin; 2740 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2741 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2742 PetscFunctionReturn(0); 2743 } 2744 2745 #undef __FUNCT__ 2746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2747 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2748 { 2749 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2754 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2755 PetscFunctionReturn(0); 2756 } 2757 2758 #undef __FUNCT__ 2759 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2760 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2761 { 2762 Mat_MPIAIJ *b; 2763 PetscErrorCode ierr; 2764 2765 PetscFunctionBegin; 2766 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2767 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2768 b = (Mat_MPIAIJ*)B->data; 2769 2770 if (!B->preallocated) { 2771 /* Explicitly create 2 MATSEQAIJ matrices. */ 2772 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2773 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2774 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2775 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2776 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2777 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2778 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2779 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2780 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2781 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2782 } 2783 2784 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2785 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2786 B->preallocated = PETSC_TRUE; 2787 PetscFunctionReturn(0); 2788 } 2789 2790 #undef __FUNCT__ 2791 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2792 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2793 { 2794 Mat mat; 2795 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2796 PetscErrorCode ierr; 2797 2798 PetscFunctionBegin; 2799 *newmat = 0; 2800 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2801 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2802 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2803 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2804 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2805 a = (Mat_MPIAIJ*)mat->data; 2806 2807 mat->factortype = matin->factortype; 2808 mat->assembled = PETSC_TRUE; 2809 mat->insertmode = NOT_SET_VALUES; 2810 mat->preallocated = PETSC_TRUE; 2811 2812 a->size = oldmat->size; 2813 a->rank = oldmat->rank; 2814 a->donotstash = oldmat->donotstash; 2815 a->roworiented = oldmat->roworiented; 2816 a->rowindices = 0; 2817 a->rowvalues = 0; 2818 a->getrowactive = PETSC_FALSE; 2819 2820 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2821 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2822 2823 if (oldmat->colmap) { 2824 #if defined(PETSC_USE_CTABLE) 2825 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2826 #else 2827 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 2828 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2829 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2830 #endif 2831 } else a->colmap = 0; 2832 if (oldmat->garray) { 2833 PetscInt len; 2834 len = oldmat->B->cmap->n; 2835 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 2836 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2837 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2838 } else a->garray = 0; 2839 2840 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2842 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2844 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2845 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2846 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2847 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2848 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2849 *newmat = mat; 2850 PetscFunctionReturn(0); 2851 } 2852 2853 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatLoad_MPIAIJ" 2857 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2858 { 2859 PetscScalar *vals,*svals; 2860 MPI_Comm comm; 2861 PetscErrorCode ierr; 2862 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2863 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2864 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2865 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2866 PetscInt cend,cstart,n,*rowners,sizesset=1; 2867 int fd; 2868 PetscInt bs = 1; 2869 2870 PetscFunctionBegin; 2871 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2872 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2873 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2874 if (!rank) { 2875 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2876 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2877 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2878 } 2879 2880 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2881 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2882 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2883 2884 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2885 2886 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2887 M = header[1]; N = header[2]; 2888 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2889 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2890 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2891 2892 /* If global sizes are set, check if they are consistent with that given in the file */ 2893 if (sizesset) { 2894 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2895 } 2896 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2897 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2898 2899 /* determine ownership of all (block) rows */ 2900 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2901 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2902 else m = newMat->rmap->n; /* Set by user */ 2903 2904 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 2905 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2906 2907 /* First process needs enough room for process with most rows */ 2908 if (!rank) { 2909 mmax = rowners[1]; 2910 for (i=2; i<=size; i++) { 2911 mmax = PetscMax(mmax, rowners[i]); 2912 } 2913 } else mmax = -1; /* unused, but compilers complain */ 2914 2915 rowners[0] = 0; 2916 for (i=2; i<=size; i++) { 2917 rowners[i] += rowners[i-1]; 2918 } 2919 rstart = rowners[rank]; 2920 rend = rowners[rank+1]; 2921 2922 /* distribute row lengths to all processors */ 2923 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2924 if (!rank) { 2925 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2926 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2927 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2928 for (j=0; j<m; j++) { 2929 procsnz[0] += ourlens[j]; 2930 } 2931 for (i=1; i<size; i++) { 2932 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2933 /* calculate the number of nonzeros on each processor */ 2934 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2935 procsnz[i] += rowlengths[j]; 2936 } 2937 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2938 } 2939 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2940 } else { 2941 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2942 } 2943 2944 if (!rank) { 2945 /* determine max buffer needed and allocate it */ 2946 maxnz = 0; 2947 for (i=0; i<size; i++) { 2948 maxnz = PetscMax(maxnz,procsnz[i]); 2949 } 2950 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2951 2952 /* read in my part of the matrix column indices */ 2953 nz = procsnz[0]; 2954 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2955 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2956 2957 /* read in every one elses and ship off */ 2958 for (i=1; i<size; i++) { 2959 nz = procsnz[i]; 2960 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2961 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2962 } 2963 ierr = PetscFree(cols);CHKERRQ(ierr); 2964 } else { 2965 /* determine buffer space needed for message */ 2966 nz = 0; 2967 for (i=0; i<m; i++) { 2968 nz += ourlens[i]; 2969 } 2970 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2971 2972 /* receive message of column indices*/ 2973 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2974 } 2975 2976 /* determine column ownership if matrix is not square */ 2977 if (N != M) { 2978 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2979 else n = newMat->cmap->n; 2980 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2981 cstart = cend - n; 2982 } else { 2983 cstart = rstart; 2984 cend = rend; 2985 n = cend - cstart; 2986 } 2987 2988 /* loop over local rows, determining number of off diagonal entries */ 2989 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2990 jj = 0; 2991 for (i=0; i<m; i++) { 2992 for (j=0; j<ourlens[i]; j++) { 2993 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2994 jj++; 2995 } 2996 } 2997 2998 for (i=0; i<m; i++) { 2999 ourlens[i] -= offlens[i]; 3000 } 3001 if (!sizesset) { 3002 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3003 } 3004 3005 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3006 3007 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3008 3009 for (i=0; i<m; i++) { 3010 ourlens[i] += offlens[i]; 3011 } 3012 3013 if (!rank) { 3014 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3015 3016 /* read in my part of the matrix numerical values */ 3017 nz = procsnz[0]; 3018 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3019 3020 /* insert into matrix */ 3021 jj = rstart; 3022 smycols = mycols; 3023 svals = vals; 3024 for (i=0; i<m; i++) { 3025 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3026 smycols += ourlens[i]; 3027 svals += ourlens[i]; 3028 jj++; 3029 } 3030 3031 /* read in other processors and ship out */ 3032 for (i=1; i<size; i++) { 3033 nz = procsnz[i]; 3034 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3035 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3036 } 3037 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3038 } else { 3039 /* receive numeric values */ 3040 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3041 3042 /* receive message of values*/ 3043 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3044 3045 /* insert into matrix */ 3046 jj = rstart; 3047 smycols = mycols; 3048 svals = vals; 3049 for (i=0; i<m; i++) { 3050 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3051 smycols += ourlens[i]; 3052 svals += ourlens[i]; 3053 jj++; 3054 } 3055 } 3056 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3057 ierr = PetscFree(vals);CHKERRQ(ierr); 3058 ierr = PetscFree(mycols);CHKERRQ(ierr); 3059 ierr = PetscFree(rowners);CHKERRQ(ierr); 3060 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3061 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3062 PetscFunctionReturn(0); 3063 } 3064 3065 #undef __FUNCT__ 3066 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3067 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3068 { 3069 PetscErrorCode ierr; 3070 IS iscol_local; 3071 PetscInt csize; 3072 3073 PetscFunctionBegin; 3074 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3075 if (call == MAT_REUSE_MATRIX) { 3076 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3077 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3078 } else { 3079 PetscInt cbs; 3080 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3081 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3082 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3083 } 3084 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3085 if (call == MAT_INITIAL_MATRIX) { 3086 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3087 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3088 } 3089 PetscFunctionReturn(0); 3090 } 3091 3092 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3093 #undef __FUNCT__ 3094 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3095 /* 3096 Not great since it makes two copies of the submatrix, first an SeqAIJ 3097 in local and then by concatenating the local matrices the end result. 3098 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3099 3100 Note: This requires a sequential iscol with all indices. 3101 */ 3102 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3103 { 3104 PetscErrorCode ierr; 3105 PetscMPIInt rank,size; 3106 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3107 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3108 PetscBool allcolumns, colflag; 3109 Mat M,Mreuse; 3110 MatScalar *vwork,*aa; 3111 MPI_Comm comm; 3112 Mat_SeqAIJ *aij; 3113 3114 PetscFunctionBegin; 3115 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3116 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3117 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3118 3119 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3120 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3121 if (colflag && ncol == mat->cmap->N) { 3122 allcolumns = PETSC_TRUE; 3123 } else { 3124 allcolumns = PETSC_FALSE; 3125 } 3126 if (call == MAT_REUSE_MATRIX) { 3127 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3128 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3129 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3130 } else { 3131 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3132 } 3133 3134 /* 3135 m - number of local rows 3136 n - number of columns (same on all processors) 3137 rstart - first row in new global matrix generated 3138 */ 3139 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3140 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3141 if (call == MAT_INITIAL_MATRIX) { 3142 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3143 ii = aij->i; 3144 jj = aij->j; 3145 3146 /* 3147 Determine the number of non-zeros in the diagonal and off-diagonal 3148 portions of the matrix in order to do correct preallocation 3149 */ 3150 3151 /* first get start and end of "diagonal" columns */ 3152 if (csize == PETSC_DECIDE) { 3153 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3154 if (mglobal == n) { /* square matrix */ 3155 nlocal = m; 3156 } else { 3157 nlocal = n/size + ((n % size) > rank); 3158 } 3159 } else { 3160 nlocal = csize; 3161 } 3162 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3163 rstart = rend - nlocal; 3164 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3165 3166 /* next, compute all the lengths */ 3167 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3168 olens = dlens + m; 3169 for (i=0; i<m; i++) { 3170 jend = ii[i+1] - ii[i]; 3171 olen = 0; 3172 dlen = 0; 3173 for (j=0; j<jend; j++) { 3174 if (*jj < rstart || *jj >= rend) olen++; 3175 else dlen++; 3176 jj++; 3177 } 3178 olens[i] = olen; 3179 dlens[i] = dlen; 3180 } 3181 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3182 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3183 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3184 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3185 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3186 ierr = PetscFree(dlens);CHKERRQ(ierr); 3187 } else { 3188 PetscInt ml,nl; 3189 3190 M = *newmat; 3191 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3192 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3193 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3194 /* 3195 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3196 rather than the slower MatSetValues(). 3197 */ 3198 M->was_assembled = PETSC_TRUE; 3199 M->assembled = PETSC_FALSE; 3200 } 3201 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3202 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3203 ii = aij->i; 3204 jj = aij->j; 3205 aa = aij->a; 3206 for (i=0; i<m; i++) { 3207 row = rstart + i; 3208 nz = ii[i+1] - ii[i]; 3209 cwork = jj; jj += nz; 3210 vwork = aa; aa += nz; 3211 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3212 } 3213 3214 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3215 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3216 *newmat = M; 3217 3218 /* save submatrix used in processor for next request */ 3219 if (call == MAT_INITIAL_MATRIX) { 3220 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3221 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3222 } 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3228 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3229 { 3230 PetscInt m,cstart, cend,j,nnz,i,d; 3231 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3232 const PetscInt *JJ; 3233 PetscScalar *values; 3234 PetscErrorCode ierr; 3235 3236 PetscFunctionBegin; 3237 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3238 3239 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3240 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3241 m = B->rmap->n; 3242 cstart = B->cmap->rstart; 3243 cend = B->cmap->rend; 3244 rstart = B->rmap->rstart; 3245 3246 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3247 3248 #if defined(PETSC_USE_DEBUGGING) 3249 for (i=0; i<m; i++) { 3250 nnz = Ii[i+1]- Ii[i]; 3251 JJ = J + Ii[i]; 3252 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3253 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3254 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3255 } 3256 #endif 3257 3258 for (i=0; i<m; i++) { 3259 nnz = Ii[i+1]- Ii[i]; 3260 JJ = J + Ii[i]; 3261 nnz_max = PetscMax(nnz_max,nnz); 3262 d = 0; 3263 for (j=0; j<nnz; j++) { 3264 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3265 } 3266 d_nnz[i] = d; 3267 o_nnz[i] = nnz - d; 3268 } 3269 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3270 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3271 3272 if (v) values = (PetscScalar*)v; 3273 else { 3274 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3275 } 3276 3277 for (i=0; i<m; i++) { 3278 ii = i + rstart; 3279 nnz = Ii[i+1]- Ii[i]; 3280 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3281 } 3282 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3283 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3284 3285 if (!v) { 3286 ierr = PetscFree(values);CHKERRQ(ierr); 3287 } 3288 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } 3291 3292 #undef __FUNCT__ 3293 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3294 /*@ 3295 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3296 (the default parallel PETSc format). 3297 3298 Collective on MPI_Comm 3299 3300 Input Parameters: 3301 + B - the matrix 3302 . i - the indices into j for the start of each local row (starts with zero) 3303 . j - the column indices for each local row (starts with zero) 3304 - v - optional values in the matrix 3305 3306 Level: developer 3307 3308 Notes: 3309 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3310 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3311 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3312 3313 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3314 3315 The format which is used for the sparse matrix input, is equivalent to a 3316 row-major ordering.. i.e for the following matrix, the input data expected is 3317 as shown: 3318 3319 1 0 0 3320 2 0 3 P0 3321 ------- 3322 4 5 6 P1 3323 3324 Process0 [P0]: rows_owned=[0,1] 3325 i = {0,1,3} [size = nrow+1 = 2+1] 3326 j = {0,0,2} [size = nz = 6] 3327 v = {1,2,3} [size = nz = 6] 3328 3329 Process1 [P1]: rows_owned=[2] 3330 i = {0,3} [size = nrow+1 = 1+1] 3331 j = {0,1,2} [size = nz = 6] 3332 v = {4,5,6} [size = nz = 6] 3333 3334 .keywords: matrix, aij, compressed row, sparse, parallel 3335 3336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3337 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3338 @*/ 3339 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3340 { 3341 PetscErrorCode ierr; 3342 3343 PetscFunctionBegin; 3344 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3345 PetscFunctionReturn(0); 3346 } 3347 3348 #undef __FUNCT__ 3349 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3350 /*@C 3351 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3352 (the default parallel PETSc format). For good matrix assembly performance 3353 the user should preallocate the matrix storage by setting the parameters 3354 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3355 performance can be increased by more than a factor of 50. 3356 3357 Collective on MPI_Comm 3358 3359 Input Parameters: 3360 + B - the matrix 3361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3362 (same value is used for all local rows) 3363 . d_nnz - array containing the number of nonzeros in the various rows of the 3364 DIAGONAL portion of the local submatrix (possibly different for each row) 3365 or NULL, if d_nz is used to specify the nonzero structure. 3366 The size of this array is equal to the number of local rows, i.e 'm'. 3367 For matrices that will be factored, you must leave room for (and set) 3368 the diagonal entry even if it is zero. 3369 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3370 submatrix (same value is used for all local rows). 3371 - o_nnz - array containing the number of nonzeros in the various rows of the 3372 OFF-DIAGONAL portion of the local submatrix (possibly different for 3373 each row) or NULL, if o_nz is used to specify the nonzero 3374 structure. The size of this array is equal to the number 3375 of local rows, i.e 'm'. 3376 3377 If the *_nnz parameter is given then the *_nz parameter is ignored 3378 3379 The AIJ format (also called the Yale sparse matrix format or 3380 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3381 storage. The stored row and column indices begin with zero. 3382 See Users-Manual: ch_mat for details. 3383 3384 The parallel matrix is partitioned such that the first m0 rows belong to 3385 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3386 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3387 3388 The DIAGONAL portion of the local submatrix of a processor can be defined 3389 as the submatrix which is obtained by extraction the part corresponding to 3390 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3391 first row that belongs to the processor, r2 is the last row belonging to 3392 the this processor, and c1-c2 is range of indices of the local part of a 3393 vector suitable for applying the matrix to. This is an mxn matrix. In the 3394 common case of a square matrix, the row and column ranges are the same and 3395 the DIAGONAL part is also square. The remaining portion of the local 3396 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3397 3398 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3399 3400 You can call MatGetInfo() to get information on how effective the preallocation was; 3401 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3402 You can also run with the option -info and look for messages with the string 3403 malloc in them to see if additional memory allocation was needed. 3404 3405 Example usage: 3406 3407 Consider the following 8x8 matrix with 34 non-zero values, that is 3408 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3409 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3410 as follows: 3411 3412 .vb 3413 1 2 0 | 0 3 0 | 0 4 3414 Proc0 0 5 6 | 7 0 0 | 8 0 3415 9 0 10 | 11 0 0 | 12 0 3416 ------------------------------------- 3417 13 0 14 | 15 16 17 | 0 0 3418 Proc1 0 18 0 | 19 20 21 | 0 0 3419 0 0 0 | 22 23 0 | 24 0 3420 ------------------------------------- 3421 Proc2 25 26 27 | 0 0 28 | 29 0 3422 30 0 0 | 31 32 33 | 0 34 3423 .ve 3424 3425 This can be represented as a collection of submatrices as: 3426 3427 .vb 3428 A B C 3429 D E F 3430 G H I 3431 .ve 3432 3433 Where the submatrices A,B,C are owned by proc0, D,E,F are 3434 owned by proc1, G,H,I are owned by proc2. 3435 3436 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3437 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3438 The 'M','N' parameters are 8,8, and have the same values on all procs. 3439 3440 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3441 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3442 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3443 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3444 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3445 matrix, ans [DF] as another SeqAIJ matrix. 3446 3447 When d_nz, o_nz parameters are specified, d_nz storage elements are 3448 allocated for every row of the local diagonal submatrix, and o_nz 3449 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3450 One way to choose d_nz and o_nz is to use the max nonzerors per local 3451 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3452 In this case, the values of d_nz,o_nz are: 3453 .vb 3454 proc0 : dnz = 2, o_nz = 2 3455 proc1 : dnz = 3, o_nz = 2 3456 proc2 : dnz = 1, o_nz = 4 3457 .ve 3458 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3459 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3460 for proc3. i.e we are using 12+15+10=37 storage locations to store 3461 34 values. 3462 3463 When d_nnz, o_nnz parameters are specified, the storage is specified 3464 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3465 In the above case the values for d_nnz,o_nnz are: 3466 .vb 3467 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3468 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3469 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3470 .ve 3471 Here the space allocated is sum of all the above values i.e 34, and 3472 hence pre-allocation is perfect. 3473 3474 Level: intermediate 3475 3476 .keywords: matrix, aij, compressed row, sparse, parallel 3477 3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3479 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3480 @*/ 3481 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3482 { 3483 PetscErrorCode ierr; 3484 3485 PetscFunctionBegin; 3486 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3487 PetscValidType(B,1); 3488 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3489 PetscFunctionReturn(0); 3490 } 3491 3492 #undef __FUNCT__ 3493 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3494 /*@ 3495 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3496 CSR format the local rows. 3497 3498 Collective on MPI_Comm 3499 3500 Input Parameters: 3501 + comm - MPI communicator 3502 . m - number of local rows (Cannot be PETSC_DECIDE) 3503 . n - This value should be the same as the local size used in creating the 3504 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3505 calculated if N is given) For square matrices n is almost always m. 3506 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3507 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3508 . i - row indices 3509 . j - column indices 3510 - a - matrix values 3511 3512 Output Parameter: 3513 . mat - the matrix 3514 3515 Level: intermediate 3516 3517 Notes: 3518 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3519 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3520 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3521 3522 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3523 3524 The format which is used for the sparse matrix input, is equivalent to a 3525 row-major ordering.. i.e for the following matrix, the input data expected is 3526 as shown: 3527 3528 1 0 0 3529 2 0 3 P0 3530 ------- 3531 4 5 6 P1 3532 3533 Process0 [P0]: rows_owned=[0,1] 3534 i = {0,1,3} [size = nrow+1 = 2+1] 3535 j = {0,0,2} [size = nz = 6] 3536 v = {1,2,3} [size = nz = 6] 3537 3538 Process1 [P1]: rows_owned=[2] 3539 i = {0,3} [size = nrow+1 = 1+1] 3540 j = {0,1,2} [size = nz = 6] 3541 v = {4,5,6} [size = nz = 6] 3542 3543 .keywords: matrix, aij, compressed row, sparse, parallel 3544 3545 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3546 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3547 @*/ 3548 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3554 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3555 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3556 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3557 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3558 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3559 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3560 PetscFunctionReturn(0); 3561 } 3562 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatCreateAIJ" 3565 /*@C 3566 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3567 (the default parallel PETSc format). For good matrix assembly performance 3568 the user should preallocate the matrix storage by setting the parameters 3569 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3570 performance can be increased by more than a factor of 50. 3571 3572 Collective on MPI_Comm 3573 3574 Input Parameters: 3575 + comm - MPI communicator 3576 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3577 This value should be the same as the local size used in creating the 3578 y vector for the matrix-vector product y = Ax. 3579 . n - This value should be the same as the local size used in creating the 3580 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3581 calculated if N is given) For square matrices n is almost always m. 3582 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3583 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3584 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3585 (same value is used for all local rows) 3586 . d_nnz - array containing the number of nonzeros in the various rows of the 3587 DIAGONAL portion of the local submatrix (possibly different for each row) 3588 or NULL, if d_nz is used to specify the nonzero structure. 3589 The size of this array is equal to the number of local rows, i.e 'm'. 3590 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3591 submatrix (same value is used for all local rows). 3592 - o_nnz - array containing the number of nonzeros in the various rows of the 3593 OFF-DIAGONAL portion of the local submatrix (possibly different for 3594 each row) or NULL, if o_nz is used to specify the nonzero 3595 structure. The size of this array is equal to the number 3596 of local rows, i.e 'm'. 3597 3598 Output Parameter: 3599 . A - the matrix 3600 3601 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3602 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3603 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3604 3605 Notes: 3606 If the *_nnz parameter is given then the *_nz parameter is ignored 3607 3608 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3609 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3610 storage requirements for this matrix. 3611 3612 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3613 processor than it must be used on all processors that share the object for 3614 that argument. 3615 3616 The user MUST specify either the local or global matrix dimensions 3617 (possibly both). 3618 3619 The parallel matrix is partitioned across processors such that the 3620 first m0 rows belong to process 0, the next m1 rows belong to 3621 process 1, the next m2 rows belong to process 2 etc.. where 3622 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3623 values corresponding to [m x N] submatrix. 3624 3625 The columns are logically partitioned with the n0 columns belonging 3626 to 0th partition, the next n1 columns belonging to the next 3627 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3628 3629 The DIAGONAL portion of the local submatrix on any given processor 3630 is the submatrix corresponding to the rows and columns m,n 3631 corresponding to the given processor. i.e diagonal matrix on 3632 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3633 etc. The remaining portion of the local submatrix [m x (N-n)] 3634 constitute the OFF-DIAGONAL portion. The example below better 3635 illustrates this concept. 3636 3637 For a square global matrix we define each processor's diagonal portion 3638 to be its local rows and the corresponding columns (a square submatrix); 3639 each processor's off-diagonal portion encompasses the remainder of the 3640 local matrix (a rectangular submatrix). 3641 3642 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3643 3644 When calling this routine with a single process communicator, a matrix of 3645 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3646 type of communicator, use the construction mechanism: 3647 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3648 3649 By default, this format uses inodes (identical nodes) when possible. 3650 We search for consecutive rows with the same nonzero structure, thereby 3651 reusing matrix information to achieve increased efficiency. 3652 3653 Options Database Keys: 3654 + -mat_no_inode - Do not use inodes 3655 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3656 - -mat_aij_oneindex - Internally use indexing starting at 1 3657 rather than 0. Note that when calling MatSetValues(), 3658 the user still MUST index entries starting at 0! 3659 3660 3661 Example usage: 3662 3663 Consider the following 8x8 matrix with 34 non-zero values, that is 3664 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3665 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3666 as follows: 3667 3668 .vb 3669 1 2 0 | 0 3 0 | 0 4 3670 Proc0 0 5 6 | 7 0 0 | 8 0 3671 9 0 10 | 11 0 0 | 12 0 3672 ------------------------------------- 3673 13 0 14 | 15 16 17 | 0 0 3674 Proc1 0 18 0 | 19 20 21 | 0 0 3675 0 0 0 | 22 23 0 | 24 0 3676 ------------------------------------- 3677 Proc2 25 26 27 | 0 0 28 | 29 0 3678 30 0 0 | 31 32 33 | 0 34 3679 .ve 3680 3681 This can be represented as a collection of submatrices as: 3682 3683 .vb 3684 A B C 3685 D E F 3686 G H I 3687 .ve 3688 3689 Where the submatrices A,B,C are owned by proc0, D,E,F are 3690 owned by proc1, G,H,I are owned by proc2. 3691 3692 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3693 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3694 The 'M','N' parameters are 8,8, and have the same values on all procs. 3695 3696 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3697 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3698 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3699 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3700 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3701 matrix, ans [DF] as another SeqAIJ matrix. 3702 3703 When d_nz, o_nz parameters are specified, d_nz storage elements are 3704 allocated for every row of the local diagonal submatrix, and o_nz 3705 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3706 One way to choose d_nz and o_nz is to use the max nonzerors per local 3707 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3708 In this case, the values of d_nz,o_nz are: 3709 .vb 3710 proc0 : dnz = 2, o_nz = 2 3711 proc1 : dnz = 3, o_nz = 2 3712 proc2 : dnz = 1, o_nz = 4 3713 .ve 3714 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3715 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3716 for proc3. i.e we are using 12+15+10=37 storage locations to store 3717 34 values. 3718 3719 When d_nnz, o_nnz parameters are specified, the storage is specified 3720 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3721 In the above case the values for d_nnz,o_nnz are: 3722 .vb 3723 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3724 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3725 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3726 .ve 3727 Here the space allocated is sum of all the above values i.e 34, and 3728 hence pre-allocation is perfect. 3729 3730 Level: intermediate 3731 3732 .keywords: matrix, aij, compressed row, sparse, parallel 3733 3734 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3735 MPIAIJ, MatCreateMPIAIJWithArrays() 3736 @*/ 3737 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3738 { 3739 PetscErrorCode ierr; 3740 PetscMPIInt size; 3741 3742 PetscFunctionBegin; 3743 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3744 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3746 if (size > 1) { 3747 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3748 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3749 } else { 3750 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3751 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3752 } 3753 PetscFunctionReturn(0); 3754 } 3755 3756 #undef __FUNCT__ 3757 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3758 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3759 { 3760 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3761 3762 PetscFunctionBegin; 3763 if (Ad) *Ad = a->A; 3764 if (Ao) *Ao = a->B; 3765 if (colmap) *colmap = a->garray; 3766 PetscFunctionReturn(0); 3767 } 3768 3769 #undef __FUNCT__ 3770 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3771 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3772 { 3773 PetscErrorCode ierr; 3774 PetscInt i; 3775 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3776 3777 PetscFunctionBegin; 3778 if (coloring->ctype == IS_COLORING_GLOBAL) { 3779 ISColoringValue *allcolors,*colors; 3780 ISColoring ocoloring; 3781 3782 /* set coloring for diagonal portion */ 3783 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3784 3785 /* set coloring for off-diagonal portion */ 3786 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3787 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3788 for (i=0; i<a->B->cmap->n; i++) { 3789 colors[i] = allcolors[a->garray[i]]; 3790 } 3791 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3792 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3793 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3794 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3795 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3796 ISColoringValue *colors; 3797 PetscInt *larray; 3798 ISColoring ocoloring; 3799 3800 /* set coloring for diagonal portion */ 3801 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 3802 for (i=0; i<a->A->cmap->n; i++) { 3803 larray[i] = i + A->cmap->rstart; 3804 } 3805 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3806 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 3807 for (i=0; i<a->A->cmap->n; i++) { 3808 colors[i] = coloring->colors[larray[i]]; 3809 } 3810 ierr = PetscFree(larray);CHKERRQ(ierr); 3811 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3812 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3813 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3814 3815 /* set coloring for off-diagonal portion */ 3816 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 3817 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3818 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3819 for (i=0; i<a->B->cmap->n; i++) { 3820 colors[i] = coloring->colors[larray[i]]; 3821 } 3822 ierr = PetscFree(larray);CHKERRQ(ierr); 3823 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3824 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3825 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3826 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3827 PetscFunctionReturn(0); 3828 } 3829 3830 #undef __FUNCT__ 3831 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3832 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3833 { 3834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3835 PetscErrorCode ierr; 3836 3837 PetscFunctionBegin; 3838 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3839 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3840 PetscFunctionReturn(0); 3841 } 3842 3843 #undef __FUNCT__ 3844 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3845 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3846 { 3847 PetscErrorCode ierr; 3848 PetscInt m,N,i,rstart,nnz,Ii; 3849 PetscInt *indx; 3850 PetscScalar *values; 3851 3852 PetscFunctionBegin; 3853 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3854 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3855 PetscInt *dnz,*onz,sum,bs,cbs; 3856 3857 if (n == PETSC_DECIDE) { 3858 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3859 } 3860 /* Check sum(n) = N */ 3861 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3862 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3863 3864 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3865 rstart -= m; 3866 3867 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3868 for (i=0; i<m; i++) { 3869 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3870 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3871 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3872 } 3873 3874 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3875 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3876 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3877 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3878 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3879 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3880 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3881 } 3882 3883 /* numeric phase */ 3884 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3885 for (i=0; i<m; i++) { 3886 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3887 Ii = i + rstart; 3888 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3889 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3890 } 3891 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3892 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatFileSplit" 3898 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3899 { 3900 PetscErrorCode ierr; 3901 PetscMPIInt rank; 3902 PetscInt m,N,i,rstart,nnz; 3903 size_t len; 3904 const PetscInt *indx; 3905 PetscViewer out; 3906 char *name; 3907 Mat B; 3908 const PetscScalar *values; 3909 3910 PetscFunctionBegin; 3911 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3912 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3913 /* Should this be the type of the diagonal block of A? */ 3914 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3915 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3916 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3917 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3918 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3919 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3920 for (i=0; i<m; i++) { 3921 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3922 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3923 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3924 } 3925 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3926 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3927 3928 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3929 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3930 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 3931 sprintf(name,"%s.%d",outfile,rank); 3932 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3933 ierr = PetscFree(name);CHKERRQ(ierr); 3934 ierr = MatView(B,out);CHKERRQ(ierr); 3935 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3936 ierr = MatDestroy(&B);CHKERRQ(ierr); 3937 PetscFunctionReturn(0); 3938 } 3939 3940 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3941 #undef __FUNCT__ 3942 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3943 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3944 { 3945 PetscErrorCode ierr; 3946 Mat_Merge_SeqsToMPI *merge; 3947 PetscContainer container; 3948 3949 PetscFunctionBegin; 3950 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3951 if (container) { 3952 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3953 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3954 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3955 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3956 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3957 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3958 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3959 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3960 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3961 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3962 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3963 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3964 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3965 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3966 ierr = PetscFree(merge);CHKERRQ(ierr); 3967 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3968 } 3969 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #include <../src/mat/utils/freespace.h> 3974 #include <petscbt.h> 3975 3976 #undef __FUNCT__ 3977 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3978 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3979 { 3980 PetscErrorCode ierr; 3981 MPI_Comm comm; 3982 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3983 PetscMPIInt size,rank,taga,*len_s; 3984 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3985 PetscInt proc,m; 3986 PetscInt **buf_ri,**buf_rj; 3987 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3988 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3989 MPI_Request *s_waits,*r_waits; 3990 MPI_Status *status; 3991 MatScalar *aa=a->a; 3992 MatScalar **abuf_r,*ba_i; 3993 Mat_Merge_SeqsToMPI *merge; 3994 PetscContainer container; 3995 3996 PetscFunctionBegin; 3997 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3998 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3999 4000 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4001 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4002 4003 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4004 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4005 4006 bi = merge->bi; 4007 bj = merge->bj; 4008 buf_ri = merge->buf_ri; 4009 buf_rj = merge->buf_rj; 4010 4011 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4012 owners = merge->rowmap->range; 4013 len_s = merge->len_s; 4014 4015 /* send and recv matrix values */ 4016 /*-----------------------------*/ 4017 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4018 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4019 4020 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4021 for (proc=0,k=0; proc<size; proc++) { 4022 if (!len_s[proc]) continue; 4023 i = owners[proc]; 4024 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4025 k++; 4026 } 4027 4028 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4029 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4030 ierr = PetscFree(status);CHKERRQ(ierr); 4031 4032 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4033 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4034 4035 /* insert mat values of mpimat */ 4036 /*----------------------------*/ 4037 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4038 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4039 4040 for (k=0; k<merge->nrecv; k++) { 4041 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4042 nrows = *(buf_ri_k[k]); 4043 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4044 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4045 } 4046 4047 /* set values of ba */ 4048 m = merge->rowmap->n; 4049 for (i=0; i<m; i++) { 4050 arow = owners[rank] + i; 4051 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4052 bnzi = bi[i+1] - bi[i]; 4053 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4054 4055 /* add local non-zero vals of this proc's seqmat into ba */ 4056 anzi = ai[arow+1] - ai[arow]; 4057 aj = a->j + ai[arow]; 4058 aa = a->a + ai[arow]; 4059 nextaj = 0; 4060 for (j=0; nextaj<anzi; j++) { 4061 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4062 ba_i[j] += aa[nextaj++]; 4063 } 4064 } 4065 4066 /* add received vals into ba */ 4067 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4068 /* i-th row */ 4069 if (i == *nextrow[k]) { 4070 anzi = *(nextai[k]+1) - *nextai[k]; 4071 aj = buf_rj[k] + *(nextai[k]); 4072 aa = abuf_r[k] + *(nextai[k]); 4073 nextaj = 0; 4074 for (j=0; nextaj<anzi; j++) { 4075 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4076 ba_i[j] += aa[nextaj++]; 4077 } 4078 } 4079 nextrow[k]++; nextai[k]++; 4080 } 4081 } 4082 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4083 } 4084 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4085 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4086 4087 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4088 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4089 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4090 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4091 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4092 PetscFunctionReturn(0); 4093 } 4094 4095 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4096 4097 #undef __FUNCT__ 4098 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4099 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4100 { 4101 PetscErrorCode ierr; 4102 Mat B_mpi; 4103 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4104 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4105 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4106 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4107 PetscInt len,proc,*dnz,*onz,bs,cbs; 4108 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4109 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4110 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4111 MPI_Status *status; 4112 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4113 PetscBT lnkbt; 4114 Mat_Merge_SeqsToMPI *merge; 4115 PetscContainer container; 4116 4117 PetscFunctionBegin; 4118 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4119 4120 /* make sure it is a PETSc comm */ 4121 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4122 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4123 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4124 4125 ierr = PetscNew(&merge);CHKERRQ(ierr); 4126 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4127 4128 /* determine row ownership */ 4129 /*---------------------------------------------------------*/ 4130 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4131 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4132 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4133 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4134 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4135 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4136 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4137 4138 m = merge->rowmap->n; 4139 owners = merge->rowmap->range; 4140 4141 /* determine the number of messages to send, their lengths */ 4142 /*---------------------------------------------------------*/ 4143 len_s = merge->len_s; 4144 4145 len = 0; /* length of buf_si[] */ 4146 merge->nsend = 0; 4147 for (proc=0; proc<size; proc++) { 4148 len_si[proc] = 0; 4149 if (proc == rank) { 4150 len_s[proc] = 0; 4151 } else { 4152 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4153 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4154 } 4155 if (len_s[proc]) { 4156 merge->nsend++; 4157 nrows = 0; 4158 for (i=owners[proc]; i<owners[proc+1]; i++) { 4159 if (ai[i+1] > ai[i]) nrows++; 4160 } 4161 len_si[proc] = 2*(nrows+1); 4162 len += len_si[proc]; 4163 } 4164 } 4165 4166 /* determine the number and length of messages to receive for ij-structure */ 4167 /*-------------------------------------------------------------------------*/ 4168 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4169 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4170 4171 /* post the Irecv of j-structure */ 4172 /*-------------------------------*/ 4173 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4174 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4175 4176 /* post the Isend of j-structure */ 4177 /*--------------------------------*/ 4178 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4179 4180 for (proc=0, k=0; proc<size; proc++) { 4181 if (!len_s[proc]) continue; 4182 i = owners[proc]; 4183 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4184 k++; 4185 } 4186 4187 /* receives and sends of j-structure are complete */ 4188 /*------------------------------------------------*/ 4189 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4190 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4191 4192 /* send and recv i-structure */ 4193 /*---------------------------*/ 4194 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4195 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4196 4197 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4198 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4199 for (proc=0,k=0; proc<size; proc++) { 4200 if (!len_s[proc]) continue; 4201 /* form outgoing message for i-structure: 4202 buf_si[0]: nrows to be sent 4203 [1:nrows]: row index (global) 4204 [nrows+1:2*nrows+1]: i-structure index 4205 */ 4206 /*-------------------------------------------*/ 4207 nrows = len_si[proc]/2 - 1; 4208 buf_si_i = buf_si + nrows+1; 4209 buf_si[0] = nrows; 4210 buf_si_i[0] = 0; 4211 nrows = 0; 4212 for (i=owners[proc]; i<owners[proc+1]; i++) { 4213 anzi = ai[i+1] - ai[i]; 4214 if (anzi) { 4215 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4216 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4217 nrows++; 4218 } 4219 } 4220 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4221 k++; 4222 buf_si += len_si[proc]; 4223 } 4224 4225 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4226 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4227 4228 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4229 for (i=0; i<merge->nrecv; i++) { 4230 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4231 } 4232 4233 ierr = PetscFree(len_si);CHKERRQ(ierr); 4234 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4235 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4236 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4237 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4238 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4239 ierr = PetscFree(status);CHKERRQ(ierr); 4240 4241 /* compute a local seq matrix in each processor */ 4242 /*----------------------------------------------*/ 4243 /* allocate bi array and free space for accumulating nonzero column info */ 4244 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4245 bi[0] = 0; 4246 4247 /* create and initialize a linked list */ 4248 nlnk = N+1; 4249 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4250 4251 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4252 len = ai[owners[rank+1]] - ai[owners[rank]]; 4253 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4254 4255 current_space = free_space; 4256 4257 /* determine symbolic info for each local row */ 4258 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4259 4260 for (k=0; k<merge->nrecv; k++) { 4261 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4262 nrows = *buf_ri_k[k]; 4263 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4264 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4265 } 4266 4267 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4268 len = 0; 4269 for (i=0; i<m; i++) { 4270 bnzi = 0; 4271 /* add local non-zero cols of this proc's seqmat into lnk */ 4272 arow = owners[rank] + i; 4273 anzi = ai[arow+1] - ai[arow]; 4274 aj = a->j + ai[arow]; 4275 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4276 bnzi += nlnk; 4277 /* add received col data into lnk */ 4278 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4279 if (i == *nextrow[k]) { /* i-th row */ 4280 anzi = *(nextai[k]+1) - *nextai[k]; 4281 aj = buf_rj[k] + *nextai[k]; 4282 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4283 bnzi += nlnk; 4284 nextrow[k]++; nextai[k]++; 4285 } 4286 } 4287 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4288 4289 /* if free space is not available, make more free space */ 4290 if (current_space->local_remaining<bnzi) { 4291 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4292 nspacedouble++; 4293 } 4294 /* copy data into free space, then initialize lnk */ 4295 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4296 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4297 4298 current_space->array += bnzi; 4299 current_space->local_used += bnzi; 4300 current_space->local_remaining -= bnzi; 4301 4302 bi[i+1] = bi[i] + bnzi; 4303 } 4304 4305 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4306 4307 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4308 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4309 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4310 4311 /* create symbolic parallel matrix B_mpi */ 4312 /*---------------------------------------*/ 4313 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4314 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4315 if (n==PETSC_DECIDE) { 4316 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4317 } else { 4318 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4319 } 4320 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4321 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4322 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4323 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4324 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4325 4326 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4327 B_mpi->assembled = PETSC_FALSE; 4328 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4329 merge->bi = bi; 4330 merge->bj = bj; 4331 merge->buf_ri = buf_ri; 4332 merge->buf_rj = buf_rj; 4333 merge->coi = NULL; 4334 merge->coj = NULL; 4335 merge->owners_co = NULL; 4336 4337 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4338 4339 /* attach the supporting struct to B_mpi for reuse */ 4340 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4341 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4342 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4343 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4344 *mpimat = B_mpi; 4345 4346 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4347 PetscFunctionReturn(0); 4348 } 4349 4350 #undef __FUNCT__ 4351 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4352 /*@C 4353 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4354 matrices from each processor 4355 4356 Collective on MPI_Comm 4357 4358 Input Parameters: 4359 + comm - the communicators the parallel matrix will live on 4360 . seqmat - the input sequential matrices 4361 . m - number of local rows (or PETSC_DECIDE) 4362 . n - number of local columns (or PETSC_DECIDE) 4363 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4364 4365 Output Parameter: 4366 . mpimat - the parallel matrix generated 4367 4368 Level: advanced 4369 4370 Notes: 4371 The dimensions of the sequential matrix in each processor MUST be the same. 4372 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4373 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4374 @*/ 4375 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4376 { 4377 PetscErrorCode ierr; 4378 PetscMPIInt size; 4379 4380 PetscFunctionBegin; 4381 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4382 if (size == 1) { 4383 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4384 if (scall == MAT_INITIAL_MATRIX) { 4385 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4386 } else { 4387 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4388 } 4389 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4390 PetscFunctionReturn(0); 4391 } 4392 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4393 if (scall == MAT_INITIAL_MATRIX) { 4394 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4395 } 4396 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4397 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4398 PetscFunctionReturn(0); 4399 } 4400 4401 #undef __FUNCT__ 4402 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4403 /*@ 4404 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4405 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4406 with MatGetSize() 4407 4408 Not Collective 4409 4410 Input Parameters: 4411 + A - the matrix 4412 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4413 4414 Output Parameter: 4415 . A_loc - the local sequential matrix generated 4416 4417 Level: developer 4418 4419 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4420 4421 @*/ 4422 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4423 { 4424 PetscErrorCode ierr; 4425 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4426 Mat_SeqAIJ *mat,*a,*b; 4427 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4428 MatScalar *aa,*ba,*cam; 4429 PetscScalar *ca; 4430 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4431 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4432 PetscBool match; 4433 MPI_Comm comm; 4434 PetscMPIInt size; 4435 4436 PetscFunctionBegin; 4437 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4438 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4439 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4440 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4441 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4442 4443 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4444 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4445 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4446 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4447 aa = a->a; ba = b->a; 4448 if (scall == MAT_INITIAL_MATRIX) { 4449 if (size == 1) { 4450 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4455 ci[0] = 0; 4456 for (i=0; i<am; i++) { 4457 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4458 } 4459 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4460 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4461 k = 0; 4462 for (i=0; i<am; i++) { 4463 ncols_o = bi[i+1] - bi[i]; 4464 ncols_d = ai[i+1] - ai[i]; 4465 /* off-diagonal portion of A */ 4466 for (jo=0; jo<ncols_o; jo++) { 4467 col = cmap[*bj]; 4468 if (col >= cstart) break; 4469 cj[k] = col; bj++; 4470 ca[k++] = *ba++; 4471 } 4472 /* diagonal portion of A */ 4473 for (j=0; j<ncols_d; j++) { 4474 cj[k] = cstart + *aj++; 4475 ca[k++] = *aa++; 4476 } 4477 /* off-diagonal portion of A */ 4478 for (j=jo; j<ncols_o; j++) { 4479 cj[k] = cmap[*bj++]; 4480 ca[k++] = *ba++; 4481 } 4482 } 4483 /* put together the new matrix */ 4484 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4485 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4486 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4487 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4488 mat->free_a = PETSC_TRUE; 4489 mat->free_ij = PETSC_TRUE; 4490 mat->nonew = 0; 4491 } else if (scall == MAT_REUSE_MATRIX) { 4492 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4493 ci = mat->i; cj = mat->j; cam = mat->a; 4494 for (i=0; i<am; i++) { 4495 /* off-diagonal portion of A */ 4496 ncols_o = bi[i+1] - bi[i]; 4497 for (jo=0; jo<ncols_o; jo++) { 4498 col = cmap[*bj]; 4499 if (col >= cstart) break; 4500 *cam++ = *ba++; bj++; 4501 } 4502 /* diagonal portion of A */ 4503 ncols_d = ai[i+1] - ai[i]; 4504 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4505 /* off-diagonal portion of A */ 4506 for (j=jo; j<ncols_o; j++) { 4507 *cam++ = *ba++; bj++; 4508 } 4509 } 4510 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4511 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4512 PetscFunctionReturn(0); 4513 } 4514 4515 #undef __FUNCT__ 4516 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4517 /*@C 4518 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4519 4520 Not Collective 4521 4522 Input Parameters: 4523 + A - the matrix 4524 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4525 - row, col - index sets of rows and columns to extract (or NULL) 4526 4527 Output Parameter: 4528 . A_loc - the local sequential matrix generated 4529 4530 Level: developer 4531 4532 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4533 4534 @*/ 4535 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4536 { 4537 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4538 PetscErrorCode ierr; 4539 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4540 IS isrowa,iscola; 4541 Mat *aloc; 4542 PetscBool match; 4543 4544 PetscFunctionBegin; 4545 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4546 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4547 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4548 if (!row) { 4549 start = A->rmap->rstart; end = A->rmap->rend; 4550 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4551 } else { 4552 isrowa = *row; 4553 } 4554 if (!col) { 4555 start = A->cmap->rstart; 4556 cmap = a->garray; 4557 nzA = a->A->cmap->n; 4558 nzB = a->B->cmap->n; 4559 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4560 ncols = 0; 4561 for (i=0; i<nzB; i++) { 4562 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4563 else break; 4564 } 4565 imark = i; 4566 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4567 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4568 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4569 } else { 4570 iscola = *col; 4571 } 4572 if (scall != MAT_INITIAL_MATRIX) { 4573 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4574 aloc[0] = *A_loc; 4575 } 4576 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4577 *A_loc = aloc[0]; 4578 ierr = PetscFree(aloc);CHKERRQ(ierr); 4579 if (!row) { 4580 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4581 } 4582 if (!col) { 4583 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4584 } 4585 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4586 PetscFunctionReturn(0); 4587 } 4588 4589 #undef __FUNCT__ 4590 #define __FUNCT__ "MatGetBrowsOfAcols" 4591 /*@C 4592 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4593 4594 Collective on Mat 4595 4596 Input Parameters: 4597 + A,B - the matrices in mpiaij format 4598 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4599 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4600 4601 Output Parameter: 4602 + rowb, colb - index sets of rows and columns of B to extract 4603 - B_seq - the sequential matrix generated 4604 4605 Level: developer 4606 4607 @*/ 4608 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4609 { 4610 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4611 PetscErrorCode ierr; 4612 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4613 IS isrowb,iscolb; 4614 Mat *bseq=NULL; 4615 4616 PetscFunctionBegin; 4617 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4618 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4619 } 4620 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4621 4622 if (scall == MAT_INITIAL_MATRIX) { 4623 start = A->cmap->rstart; 4624 cmap = a->garray; 4625 nzA = a->A->cmap->n; 4626 nzB = a->B->cmap->n; 4627 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4628 ncols = 0; 4629 for (i=0; i<nzB; i++) { /* row < local row index */ 4630 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4631 else break; 4632 } 4633 imark = i; 4634 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4635 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4636 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4637 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4638 } else { 4639 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4640 isrowb = *rowb; iscolb = *colb; 4641 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4642 bseq[0] = *B_seq; 4643 } 4644 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4645 *B_seq = bseq[0]; 4646 ierr = PetscFree(bseq);CHKERRQ(ierr); 4647 if (!rowb) { 4648 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4649 } else { 4650 *rowb = isrowb; 4651 } 4652 if (!colb) { 4653 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4654 } else { 4655 *colb = iscolb; 4656 } 4657 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4658 PetscFunctionReturn(0); 4659 } 4660 4661 #undef __FUNCT__ 4662 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4663 /* 4664 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4665 of the OFF-DIAGONAL portion of local A 4666 4667 Collective on Mat 4668 4669 Input Parameters: 4670 + A,B - the matrices in mpiaij format 4671 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4672 4673 Output Parameter: 4674 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4675 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4676 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4677 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4678 4679 Level: developer 4680 4681 */ 4682 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4683 { 4684 VecScatter_MPI_General *gen_to,*gen_from; 4685 PetscErrorCode ierr; 4686 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4687 Mat_SeqAIJ *b_oth; 4688 VecScatter ctx =a->Mvctx; 4689 MPI_Comm comm; 4690 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4691 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4692 PetscScalar *rvalues,*svalues; 4693 MatScalar *b_otha,*bufa,*bufA; 4694 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4695 MPI_Request *rwaits = NULL,*swaits = NULL; 4696 MPI_Status *sstatus,rstatus; 4697 PetscMPIInt jj,size; 4698 PetscInt *cols,sbs,rbs; 4699 PetscScalar *vals; 4700 4701 PetscFunctionBegin; 4702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4703 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4704 if (size == 1) PetscFunctionReturn(0); 4705 4706 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4707 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4708 } 4709 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4711 4712 gen_to = (VecScatter_MPI_General*)ctx->todata; 4713 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4714 rvalues = gen_from->values; /* holds the length of receiving row */ 4715 svalues = gen_to->values; /* holds the length of sending row */ 4716 nrecvs = gen_from->n; 4717 nsends = gen_to->n; 4718 4719 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4720 srow = gen_to->indices; /* local row index to be sent */ 4721 sstarts = gen_to->starts; 4722 sprocs = gen_to->procs; 4723 sstatus = gen_to->sstatus; 4724 sbs = gen_to->bs; 4725 rstarts = gen_from->starts; 4726 rprocs = gen_from->procs; 4727 rbs = gen_from->bs; 4728 4729 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4730 if (scall == MAT_INITIAL_MATRIX) { 4731 /* i-array */ 4732 /*---------*/ 4733 /* post receives */ 4734 for (i=0; i<nrecvs; i++) { 4735 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4736 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4737 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4738 } 4739 4740 /* pack the outgoing message */ 4741 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4742 4743 sstartsj[0] = 0; 4744 rstartsj[0] = 0; 4745 len = 0; /* total length of j or a array to be sent */ 4746 k = 0; 4747 for (i=0; i<nsends; i++) { 4748 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4749 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4750 for (j=0; j<nrows; j++) { 4751 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4752 for (l=0; l<sbs; l++) { 4753 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4754 4755 rowlen[j*sbs+l] = ncols; 4756 4757 len += ncols; 4758 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4759 } 4760 k++; 4761 } 4762 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4763 4764 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4765 } 4766 /* recvs and sends of i-array are completed */ 4767 i = nrecvs; 4768 while (i--) { 4769 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4770 } 4771 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4772 4773 /* allocate buffers for sending j and a arrays */ 4774 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 4775 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 4776 4777 /* create i-array of B_oth */ 4778 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 4779 4780 b_othi[0] = 0; 4781 len = 0; /* total length of j or a array to be received */ 4782 k = 0; 4783 for (i=0; i<nrecvs; i++) { 4784 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4785 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4786 for (j=0; j<nrows; j++) { 4787 b_othi[k+1] = b_othi[k] + rowlen[j]; 4788 len += rowlen[j]; k++; 4789 } 4790 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4791 } 4792 4793 /* allocate space for j and a arrrays of B_oth */ 4794 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 4795 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 4796 4797 /* j-array */ 4798 /*---------*/ 4799 /* post receives of j-array */ 4800 for (i=0; i<nrecvs; i++) { 4801 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4802 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4803 } 4804 4805 /* pack the outgoing message j-array */ 4806 k = 0; 4807 for (i=0; i<nsends; i++) { 4808 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4809 bufJ = bufj+sstartsj[i]; 4810 for (j=0; j<nrows; j++) { 4811 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4812 for (ll=0; ll<sbs; ll++) { 4813 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4814 for (l=0; l<ncols; l++) { 4815 *bufJ++ = cols[l]; 4816 } 4817 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4818 } 4819 } 4820 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4821 } 4822 4823 /* recvs and sends of j-array are completed */ 4824 i = nrecvs; 4825 while (i--) { 4826 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4827 } 4828 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4829 } else if (scall == MAT_REUSE_MATRIX) { 4830 sstartsj = *startsj_s; 4831 rstartsj = *startsj_r; 4832 bufa = *bufa_ptr; 4833 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4834 b_otha = b_oth->a; 4835 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4836 4837 /* a-array */ 4838 /*---------*/ 4839 /* post receives of a-array */ 4840 for (i=0; i<nrecvs; i++) { 4841 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4842 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4843 } 4844 4845 /* pack the outgoing message a-array */ 4846 k = 0; 4847 for (i=0; i<nsends; i++) { 4848 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4849 bufA = bufa+sstartsj[i]; 4850 for (j=0; j<nrows; j++) { 4851 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4852 for (ll=0; ll<sbs; ll++) { 4853 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4854 for (l=0; l<ncols; l++) { 4855 *bufA++ = vals[l]; 4856 } 4857 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4858 } 4859 } 4860 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4861 } 4862 /* recvs and sends of a-array are completed */ 4863 i = nrecvs; 4864 while (i--) { 4865 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4866 } 4867 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4868 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4869 4870 if (scall == MAT_INITIAL_MATRIX) { 4871 /* put together the new matrix */ 4872 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4873 4874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4876 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4877 b_oth->free_a = PETSC_TRUE; 4878 b_oth->free_ij = PETSC_TRUE; 4879 b_oth->nonew = 0; 4880 4881 ierr = PetscFree(bufj);CHKERRQ(ierr); 4882 if (!startsj_s || !bufa_ptr) { 4883 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4884 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4885 } else { 4886 *startsj_s = sstartsj; 4887 *startsj_r = rstartsj; 4888 *bufa_ptr = bufa; 4889 } 4890 } 4891 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4892 PetscFunctionReturn(0); 4893 } 4894 4895 #undef __FUNCT__ 4896 #define __FUNCT__ "MatGetCommunicationStructs" 4897 /*@C 4898 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4899 4900 Not Collective 4901 4902 Input Parameters: 4903 . A - The matrix in mpiaij format 4904 4905 Output Parameter: 4906 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4907 . colmap - A map from global column index to local index into lvec 4908 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4909 4910 Level: developer 4911 4912 @*/ 4913 #if defined(PETSC_USE_CTABLE) 4914 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4915 #else 4916 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4917 #endif 4918 { 4919 Mat_MPIAIJ *a; 4920 4921 PetscFunctionBegin; 4922 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4923 PetscValidPointer(lvec, 2); 4924 PetscValidPointer(colmap, 3); 4925 PetscValidPointer(multScatter, 4); 4926 a = (Mat_MPIAIJ*) A->data; 4927 if (lvec) *lvec = a->lvec; 4928 if (colmap) *colmap = a->colmap; 4929 if (multScatter) *multScatter = a->Mvctx; 4930 PetscFunctionReturn(0); 4931 } 4932 4933 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4934 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4935 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4936 #if defined(PETSC_HAVE_ELEMENTAL) 4937 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4938 #endif 4939 4940 #undef __FUNCT__ 4941 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4942 /* 4943 Computes (B'*A')' since computing B*A directly is untenable 4944 4945 n p p 4946 ( ) ( ) ( ) 4947 m ( A ) * n ( B ) = m ( C ) 4948 ( ) ( ) ( ) 4949 4950 */ 4951 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4952 { 4953 PetscErrorCode ierr; 4954 Mat At,Bt,Ct; 4955 4956 PetscFunctionBegin; 4957 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4958 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4959 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4960 ierr = MatDestroy(&At);CHKERRQ(ierr); 4961 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4962 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4963 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4964 PetscFunctionReturn(0); 4965 } 4966 4967 #undef __FUNCT__ 4968 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4969 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4970 { 4971 PetscErrorCode ierr; 4972 PetscInt m=A->rmap->n,n=B->cmap->n; 4973 Mat Cmat; 4974 4975 PetscFunctionBegin; 4976 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4977 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4978 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4979 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4980 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4981 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4982 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4983 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4984 4985 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4986 4987 *C = Cmat; 4988 PetscFunctionReturn(0); 4989 } 4990 4991 /* ----------------------------------------------------------------*/ 4992 #undef __FUNCT__ 4993 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4994 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4995 { 4996 PetscErrorCode ierr; 4997 4998 PetscFunctionBegin; 4999 if (scall == MAT_INITIAL_MATRIX) { 5000 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5001 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5002 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5003 } 5004 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5005 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5006 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5007 PetscFunctionReturn(0); 5008 } 5009 5010 #if defined(PETSC_HAVE_MUMPS) 5011 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5012 #endif 5013 #if defined(PETSC_HAVE_PASTIX) 5014 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5015 #endif 5016 #if defined(PETSC_HAVE_SUPERLU_DIST) 5017 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5018 #endif 5019 #if defined(PETSC_HAVE_CLIQUE) 5020 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5021 #endif 5022 5023 /*MC 5024 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5025 5026 Options Database Keys: 5027 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5028 5029 Level: beginner 5030 5031 .seealso: MatCreateAIJ() 5032 M*/ 5033 5034 #undef __FUNCT__ 5035 #define __FUNCT__ "MatCreate_MPIAIJ" 5036 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5037 { 5038 Mat_MPIAIJ *b; 5039 PetscErrorCode ierr; 5040 PetscMPIInt size; 5041 5042 PetscFunctionBegin; 5043 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5044 5045 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5046 B->data = (void*)b; 5047 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5048 B->assembled = PETSC_FALSE; 5049 B->insertmode = NOT_SET_VALUES; 5050 b->size = size; 5051 5052 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5053 5054 /* build cache for off array entries formed */ 5055 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5056 5057 b->donotstash = PETSC_FALSE; 5058 b->colmap = 0; 5059 b->garray = 0; 5060 b->roworiented = PETSC_TRUE; 5061 5062 /* stuff used for matrix vector multiply */ 5063 b->lvec = NULL; 5064 b->Mvctx = NULL; 5065 5066 /* stuff for MatGetRow() */ 5067 b->rowindices = 0; 5068 b->rowvalues = 0; 5069 b->getrowactive = PETSC_FALSE; 5070 5071 /* flexible pointer used in CUSP/CUSPARSE classes */ 5072 b->spptr = NULL; 5073 5074 #if defined(PETSC_HAVE_MUMPS) 5075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5076 #endif 5077 #if defined(PETSC_HAVE_PASTIX) 5078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5079 #endif 5080 #if defined(PETSC_HAVE_SUPERLU_DIST) 5081 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5082 #endif 5083 #if defined(PETSC_HAVE_CLIQUE) 5084 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5085 #endif 5086 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5087 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5096 #if defined(PETSC_HAVE_ELEMENTAL) 5097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5098 #endif 5099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5102 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5103 PetscFunctionReturn(0); 5104 } 5105 5106 #undef __FUNCT__ 5107 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5108 /*@ 5109 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5110 and "off-diagonal" part of the matrix in CSR format. 5111 5112 Collective on MPI_Comm 5113 5114 Input Parameters: 5115 + comm - MPI communicator 5116 . m - number of local rows (Cannot be PETSC_DECIDE) 5117 . n - This value should be the same as the local size used in creating the 5118 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5119 calculated if N is given) For square matrices n is almost always m. 5120 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5121 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5122 . i - row indices for "diagonal" portion of matrix 5123 . j - column indices 5124 . a - matrix values 5125 . oi - row indices for "off-diagonal" portion of matrix 5126 . oj - column indices 5127 - oa - matrix values 5128 5129 Output Parameter: 5130 . mat - the matrix 5131 5132 Level: advanced 5133 5134 Notes: 5135 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5136 must free the arrays once the matrix has been destroyed and not before. 5137 5138 The i and j indices are 0 based 5139 5140 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5141 5142 This sets local rows and cannot be used to set off-processor values. 5143 5144 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5145 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5146 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5147 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5148 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5149 communication if it is known that only local entries will be set. 5150 5151 .keywords: matrix, aij, compressed row, sparse, parallel 5152 5153 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5154 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5155 @*/ 5156 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5157 { 5158 PetscErrorCode ierr; 5159 Mat_MPIAIJ *maij; 5160 5161 PetscFunctionBegin; 5162 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5163 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5164 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5165 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5166 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5167 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5168 maij = (Mat_MPIAIJ*) (*mat)->data; 5169 5170 (*mat)->preallocated = PETSC_TRUE; 5171 5172 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5173 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5174 5175 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5176 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5177 5178 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5179 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5180 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5181 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5182 5183 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5184 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5185 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5186 PetscFunctionReturn(0); 5187 } 5188 5189 /* 5190 Special version for direct calls from Fortran 5191 */ 5192 #include <petsc-private/fortranimpl.h> 5193 5194 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5195 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5196 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5197 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5198 #endif 5199 5200 /* Change these macros so can be used in void function */ 5201 #undef CHKERRQ 5202 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5203 #undef SETERRQ2 5204 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5205 #undef SETERRQ3 5206 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5207 #undef SETERRQ 5208 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5209 5210 #undef __FUNCT__ 5211 #define __FUNCT__ "matsetvaluesmpiaij_" 5212 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5213 { 5214 Mat mat = *mmat; 5215 PetscInt m = *mm, n = *mn; 5216 InsertMode addv = *maddv; 5217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5218 PetscScalar value; 5219 PetscErrorCode ierr; 5220 5221 MatCheckPreallocated(mat,1); 5222 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5223 5224 #if defined(PETSC_USE_DEBUG) 5225 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5226 #endif 5227 { 5228 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5229 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5230 PetscBool roworiented = aij->roworiented; 5231 5232 /* Some Variables required in the macro */ 5233 Mat A = aij->A; 5234 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5235 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5236 MatScalar *aa = a->a; 5237 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5238 Mat B = aij->B; 5239 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5240 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5241 MatScalar *ba = b->a; 5242 5243 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5244 PetscInt nonew = a->nonew; 5245 MatScalar *ap1,*ap2; 5246 5247 PetscFunctionBegin; 5248 for (i=0; i<m; i++) { 5249 if (im[i] < 0) continue; 5250 #if defined(PETSC_USE_DEBUG) 5251 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5252 #endif 5253 if (im[i] >= rstart && im[i] < rend) { 5254 row = im[i] - rstart; 5255 lastcol1 = -1; 5256 rp1 = aj + ai[row]; 5257 ap1 = aa + ai[row]; 5258 rmax1 = aimax[row]; 5259 nrow1 = ailen[row]; 5260 low1 = 0; 5261 high1 = nrow1; 5262 lastcol2 = -1; 5263 rp2 = bj + bi[row]; 5264 ap2 = ba + bi[row]; 5265 rmax2 = bimax[row]; 5266 nrow2 = bilen[row]; 5267 low2 = 0; 5268 high2 = nrow2; 5269 5270 for (j=0; j<n; j++) { 5271 if (roworiented) value = v[i*n+j]; 5272 else value = v[i+j*m]; 5273 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5274 if (in[j] >= cstart && in[j] < cend) { 5275 col = in[j] - cstart; 5276 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5277 } else if (in[j] < 0) continue; 5278 #if defined(PETSC_USE_DEBUG) 5279 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5280 #endif 5281 else { 5282 if (mat->was_assembled) { 5283 if (!aij->colmap) { 5284 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5285 } 5286 #if defined(PETSC_USE_CTABLE) 5287 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5288 col--; 5289 #else 5290 col = aij->colmap[in[j]] - 1; 5291 #endif 5292 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5293 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5294 col = in[j]; 5295 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5296 B = aij->B; 5297 b = (Mat_SeqAIJ*)B->data; 5298 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5299 rp2 = bj + bi[row]; 5300 ap2 = ba + bi[row]; 5301 rmax2 = bimax[row]; 5302 nrow2 = bilen[row]; 5303 low2 = 0; 5304 high2 = nrow2; 5305 bm = aij->B->rmap->n; 5306 ba = b->a; 5307 } 5308 } else col = in[j]; 5309 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5310 } 5311 } 5312 } else if (!aij->donotstash) { 5313 if (roworiented) { 5314 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5315 } else { 5316 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5317 } 5318 } 5319 } 5320 } 5321 PetscFunctionReturnVoid(); 5322 } 5323 5324