1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 730 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 731 732 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 733 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 734 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 735 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 743 { 744 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 749 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_MPIAIJ" 755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 756 { 757 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 758 PetscInt *owners = A->rmap->range; 759 PetscInt n = A->rmap->n; 760 PetscSF sf; 761 PetscInt *lrows; 762 PetscSFNode *rrows; 763 PetscInt r, p = 0, len = 0; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 /* Create SF where leaves are input rows and roots are owned rows */ 768 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 769 for (r = 0; r < n; ++r) lrows[r] = -1; 770 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 771 for (r = 0; r < N; ++r) { 772 const PetscInt idx = rows[r]; 773 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 774 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 775 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 776 } 777 if (A->nooffproczerorows) { 778 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 779 lrows[len++] = idx - owners[p]; 780 } else { 781 rrows[r].rank = p; 782 rrows[r].index = rows[r] - owners[p]; 783 } 784 } 785 if (!A->nooffproczerorows) { 786 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 787 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 788 /* Collect flags for rows to be zeroed */ 789 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 790 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 792 /* Compress and put in row numbers */ 793 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 794 } 795 /* fix right hand side if needed */ 796 if (x && b) { 797 const PetscScalar *xx; 798 PetscScalar *bb; 799 800 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 801 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 802 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 803 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 804 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 805 } 806 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 807 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 809 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 810 } else if (diag != 0.0) { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 813 for (r = 0; r < len; ++r) { 814 const PetscInt row = lrows[r] + A->rmap->rstart; 815 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 } else { 820 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(lrows);CHKERRQ(ierr); 823 824 /* only change matrix nonzero state if pattern was allowed to be changed */ 825 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 826 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 827 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 837 PetscErrorCode ierr; 838 PetscMPIInt n = A->rmap->n; 839 PetscInt i,j,r,m,p = 0,len = 0; 840 PetscInt *lrows,*owners = A->rmap->range; 841 PetscSFNode *rrows; 842 PetscSF sf; 843 const PetscScalar *xx; 844 PetscScalar *bb,*mask; 845 Vec xmask,lmask; 846 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 847 const PetscInt *aj, *ii,*ridx; 848 PetscScalar *aa; 849 850 PetscFunctionBegin; 851 /* Create SF where leaves are input rows and roots are owned rows */ 852 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 853 for (r = 0; r < n; ++r) lrows[r] = -1; 854 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 855 for (r = 0; r < N; ++r) { 856 const PetscInt idx = rows[r]; 857 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 858 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 859 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 860 } 861 rrows[r].rank = p; 862 rrows[r].index = rows[r] - owners[p]; 863 } 864 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 865 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 866 /* Collect flags for rows to be zeroed */ 867 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 868 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 870 /* Compress and put in row numbers */ 871 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 872 /* zero diagonal part of matrix */ 873 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 874 /* handle off diagonal part of matrix */ 875 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 876 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 877 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 878 for (i=0; i<len; i++) bb[lrows[i]] = 1; 879 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 880 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 883 if (x) { 884 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 887 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 888 } 889 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 890 /* remove zeroed rows of off diagonal matrix */ 891 ii = aij->i; 892 for (i=0; i<len; i++) { 893 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 894 } 895 /* loop over all elements of off process part of matrix zeroing removed columns*/ 896 if (aij->compressedrow.use) { 897 m = aij->compressedrow.nrows; 898 ii = aij->compressedrow.i; 899 ridx = aij->compressedrow.rindex; 900 for (i=0; i<m; i++) { 901 n = ii[i+1] - ii[i]; 902 aj = aij->j + ii[i]; 903 aa = aij->a + ii[i]; 904 905 for (j=0; j<n; j++) { 906 if (PetscAbsScalar(mask[*aj])) { 907 if (b) bb[*ridx] -= *aa*xx[*aj]; 908 *aa = 0.0; 909 } 910 aa++; 911 aj++; 912 } 913 ridx++; 914 } 915 } else { /* do not use compressed row format */ 916 m = l->B->rmap->n; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij->a + ii[i]; 921 for (j=0; j<n; j++) { 922 if (PetscAbsScalar(mask[*aj])) { 923 if (b) bb[i] -= *aa*xx[*aj]; 924 *aa = 0.0; 925 } 926 aa++; 927 aj++; 928 } 929 } 930 } 931 if (x) { 932 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 933 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 934 } 935 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 936 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 937 ierr = PetscFree(lrows);CHKERRQ(ierr); 938 939 /* only change matrix nonzero state if pattern was allowed to be changed */ 940 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 941 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 942 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatMult_MPIAIJ" 949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 PetscInt nt; 954 955 PetscFunctionBegin; 956 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 957 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 958 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatMultAdd_MPIAIJ" 979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 987 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 PetscBool merged; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 if (!merged) { 1005 /* send it on its way */ 1006 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 /* do local part */ 1008 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1009 /* receive remote parts: note this assumes the values are not actually */ 1010 /* added in yy until the next line, */ 1011 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1012 } else { 1013 /* do local part */ 1014 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1015 /* send it on its way */ 1016 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 /* values actually were received in the Begin() but we need to call this nop */ 1018 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1025 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1026 { 1027 MPI_Comm comm; 1028 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1029 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1030 IS Me,Notme; 1031 PetscErrorCode ierr; 1032 PetscInt M,N,first,last,*notme,i; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1039 if (!*f) PetscFunctionReturn(0); 1040 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1041 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1042 if (size == 1) PetscFunctionReturn(0); 1043 1044 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1045 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1047 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1048 for (i=0; i<first; i++) notme[i] = i; 1049 for (i=last; i<M; i++) notme[i-last+first] = i; 1050 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1051 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1052 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1053 Aoff = Aoffs[0]; 1054 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1055 Boff = Boffs[0]; 1056 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1057 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1059 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1061 ierr = PetscFree(notme);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1079 /* receive remote parts */ 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* 1085 This only works correctly for square matrices where the subblock A->A is the 1086 diagonal block 1087 */ 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 1095 PetscFunctionBegin; 1096 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1097 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1098 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatScale_MPIAIJ" 1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1111 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatDestroy_MPIAIJ" 1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1118 { 1119 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 #if defined(PETSC_USE_LOG) 1124 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1125 #endif 1126 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1127 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1128 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1130 #if defined(PETSC_USE_CTABLE) 1131 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1132 #else 1133 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1134 #endif 1135 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1136 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1137 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1138 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1139 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1140 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1141 1142 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1160 { 1161 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1162 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1163 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1164 PetscErrorCode ierr; 1165 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1166 int fd; 1167 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1168 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1169 PetscScalar *column_values; 1170 PetscInt message_count,flowcontrolcount; 1171 FILE *file; 1172 1173 PetscFunctionBegin; 1174 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1176 nz = A->nz + B->nz; 1177 if (!rank) { 1178 header[0] = MAT_FILE_CLASSID; 1179 header[1] = mat->rmap->N; 1180 header[2] = mat->cmap->N; 1181 1182 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 /* get largest number of rows any processor has */ 1186 rlen = mat->rmap->n; 1187 range = mat->rmap->range; 1188 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1189 } else { 1190 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 rlen = mat->rmap->n; 1192 } 1193 1194 /* load up the local row counts */ 1195 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1196 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1197 1198 /* store the row lengths to the file */ 1199 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1200 if (!rank) { 1201 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 for (i=1; i<size; i++) { 1203 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1204 rlen = range[i+1] - range[i]; 1205 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1207 } 1208 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1211 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1213 } 1214 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1215 1216 /* load up the local column indices */ 1217 nzmax = nz; /* th processor needs space a largest processor needs */ 1218 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1220 cnt = 0; 1221 for (i=0; i<mat->rmap->n; i++) { 1222 for (j=B->i[i]; j<B->i[i+1]; j++) { 1223 if ((col = garray[B->j[j]]) > cstart) break; 1224 column_indices[cnt++] = col; 1225 } 1226 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1227 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1228 } 1229 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1230 1231 /* store the column indices to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1239 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1240 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1251 1252 /* load up the local column values */ 1253 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1254 cnt = 0; 1255 for (i=0; i<mat->rmap->n; i++) { 1256 for (j=B->i[i]; j<B->i[i+1]; j++) { 1257 if (garray[B->j[j]] > cstart) break; 1258 column_values[cnt++] = B->a[j]; 1259 } 1260 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1261 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1262 } 1263 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1264 1265 /* store the column values to the file */ 1266 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1267 if (!rank) { 1268 MPI_Status status; 1269 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1270 for (i=1; i<size; i++) { 1271 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1272 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1273 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1274 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1276 } 1277 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1278 } else { 1279 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1280 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(column_values);CHKERRQ(ierr); 1285 1286 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1287 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #include <petscdraw.h> 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 PetscErrorCode ierr; 1298 PetscMPIInt rank = aij->rank,size = aij->size; 1299 PetscBool isdraw,iascii,isbinary; 1300 PetscViewer sviewer; 1301 PetscViewerFormat format; 1302 1303 PetscFunctionBegin; 1304 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1305 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1306 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1307 if (iascii) { 1308 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscBool inodes; 1312 1313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1314 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1315 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1317 if (!inodes) { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } 1324 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1331 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount,inodelimit,*inodes; 1335 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1336 if (inodes) { 1337 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(0); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1348 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1349 } else { 1350 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } else if (isdraw) { 1354 PetscDraw draw; 1355 PetscBool isnull; 1356 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1357 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1358 } 1359 1360 { 1361 /* assemble the entire matrix onto first processor. */ 1362 Mat A; 1363 Mat_SeqAIJ *Aloc; 1364 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1365 MatScalar *a; 1366 1367 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1368 if (!rank) { 1369 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1370 } else { 1371 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1372 } 1373 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1374 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1375 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1376 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1377 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1378 1379 /* copy over the A part */ 1380 Aloc = (Mat_SeqAIJ*)aij->A->data; 1381 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1382 row = mat->rmap->rstart; 1383 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1384 for (i=0; i<m; i++) { 1385 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1386 row++; 1387 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1388 } 1389 aj = Aloc->j; 1390 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1391 1392 /* copy over the B part */ 1393 Aloc = (Mat_SeqAIJ*)aij->B->data; 1394 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1395 row = mat->rmap->rstart; 1396 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1397 ct = cols; 1398 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1399 for (i=0; i<m; i++) { 1400 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1401 row++; 1402 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1403 } 1404 ierr = PetscFree(ct);CHKERRQ(ierr); 1405 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 /* 1408 Everyone has to call to draw the matrix since the graphics waits are 1409 synchronized across all processors that share the PetscDraw object 1410 */ 1411 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1412 if (!rank) { 1413 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1414 } 1415 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ" 1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1424 { 1425 PetscErrorCode ierr; 1426 PetscBool iascii,isdraw,issocket,isbinary; 1427 1428 PetscFunctionBegin; 1429 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1433 if (iascii || isdraw || isbinary || issocket) { 1434 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatSOR_MPIAIJ" 1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1442 { 1443 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1444 PetscErrorCode ierr; 1445 Vec bb1 = 0; 1446 PetscBool hasop; 1447 1448 PetscFunctionBegin; 1449 if (flag == SOR_APPLY_UPPER) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1455 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1456 } 1457 1458 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 1464 while (its--) { 1465 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1471 1472 /* local sweep */ 1473 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1474 } 1475 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1478 its--; 1479 } 1480 while (its--) { 1481 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1487 1488 /* local sweep */ 1489 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1490 } 1491 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 while (its--) { 1497 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 1500 /* update rhs: bb1 = bb - B*x */ 1501 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1506 } 1507 } else if (flag & SOR_EISENSTAT) { 1508 Vec xx1; 1509 1510 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1512 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 if (!mat->diag) { 1516 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1517 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1518 } 1519 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1520 if (hasop) { 1521 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1522 } else { 1523 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1524 } 1525 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1526 1527 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1531 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1532 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1533 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1534 1535 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatPermute_MPIAIJ" 1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1542 { 1543 Mat aA,aB,Aperm; 1544 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1545 PetscScalar *aa,*ba; 1546 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1547 PetscSF rowsf,sf; 1548 IS parcolp = NULL; 1549 PetscBool done; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1554 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1555 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1556 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1557 1558 /* Invert row permutation to find out where my rows should go */ 1559 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1560 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1561 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1562 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1563 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 1566 /* Invert column permutation to find out where my columns should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1570 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1571 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1574 1575 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1576 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1577 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1578 1579 /* Find out where my gcols should go */ 1580 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1588 1589 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1590 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt row = rdest[i],rowner; 1594 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1595 for (j=ai[i]; j<ai[i+1]; j++) { 1596 PetscInt cowner,col = cdest[aj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 for (j=bi[i]; j<bi[i+1]; j++) { 1602 PetscInt cowner,col = gcdest[bj[j]]; 1603 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1604 if (rowner == cowner) dnnz[i]++; 1605 else onnz[i]++; 1606 } 1607 } 1608 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1613 1614 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1615 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1617 for (i=0; i<m; i++) { 1618 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1619 PetscInt j0,rowlen; 1620 rowlen = ai[i+1] - ai[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 rowlen = bi[i+1] - bi[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1635 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1637 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1639 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1640 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1641 *B = Aperm; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1648 { 1649 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1650 Mat A = mat->A,B = mat->B; 1651 PetscErrorCode ierr; 1652 PetscReal isend[5],irecv[5]; 1653 1654 PetscFunctionBegin; 1655 info->block_size = 1.0; 1656 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1659 isend[3] = info->memory; isend[4] = info->mallocs; 1660 1661 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1664 isend[3] += info->memory; isend[4] += info->mallocs; 1665 if (flag == MAT_LOCAL) { 1666 info->nz_used = isend[0]; 1667 info->nz_allocated = isend[1]; 1668 info->nz_unneeded = isend[2]; 1669 info->memory = isend[3]; 1670 info->mallocs = isend[4]; 1671 } else if (flag == MAT_GLOBAL_MAX) { 1672 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } else if (flag == MAT_GLOBAL_SUM) { 1680 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1681 1682 info->nz_used = irecv[0]; 1683 info->nz_allocated = irecv[1]; 1684 info->nz_unneeded = irecv[2]; 1685 info->memory = irecv[3]; 1686 info->mallocs = irecv[4]; 1687 } 1688 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1689 info->fill_ratio_needed = 0; 1690 info->factor_mallocs = 0; 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatSetOption_MPIAIJ" 1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1697 { 1698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 switch (op) { 1703 case MAT_NEW_NONZERO_LOCATIONS: 1704 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1705 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1706 case MAT_KEEP_NONZERO_PATTERN: 1707 case MAT_NEW_NONZERO_LOCATION_ERR: 1708 case MAT_USE_INODES: 1709 case MAT_IGNORE_ZERO_ENTRIES: 1710 MatCheckPreallocated(A,1); 1711 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1712 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1713 break; 1714 case MAT_ROW_ORIENTED: 1715 a->roworiented = flg; 1716 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1719 break; 1720 case MAT_NEW_DIAGONALS: 1721 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1722 break; 1723 case MAT_IGNORE_OFF_PROC_ENTRIES: 1724 a->donotstash = flg; 1725 break; 1726 case MAT_SPD: 1727 A->spd_set = PETSC_TRUE; 1728 A->spd = flg; 1729 if (flg) { 1730 A->symmetric = PETSC_TRUE; 1731 A->structurally_symmetric = PETSC_TRUE; 1732 A->symmetric_set = PETSC_TRUE; 1733 A->structurally_symmetric_set = PETSC_TRUE; 1734 } 1735 break; 1736 case MAT_SYMMETRIC: 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_STRUCTURALLY_SYMMETRIC: 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_HERMITIAN: 1743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_SYMMETRY_ETERNAL: 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 default: 1749 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatGetRow_MPIAIJ" 1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1757 { 1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1759 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1760 PetscErrorCode ierr; 1761 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1762 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1763 PetscInt *cmap,*idx_p; 1764 1765 PetscFunctionBegin; 1766 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1767 mat->getrowactive = PETSC_TRUE; 1768 1769 if (!mat->rowvalues && (idx || v)) { 1770 /* 1771 allocate enough space to hold information from the longest row. 1772 */ 1773 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1774 PetscInt max = 1,tmp; 1775 for (i=0; i<matin->rmap->n; i++) { 1776 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1777 if (max < tmp) max = tmp; 1778 } 1779 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1780 } 1781 1782 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1783 lrow = row - rstart; 1784 1785 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1786 if (!v) {pvA = 0; pvB = 0;} 1787 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1788 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 nztot = nzA + nzB; 1791 1792 cmap = mat->garray; 1793 if (v || idx) { 1794 if (nztot) { 1795 /* Sort by increasing column numbers, assuming A and B already sorted */ 1796 PetscInt imark = -1; 1797 if (v) { 1798 *v = v_p = mat->rowvalues; 1799 for (i=0; i<nzB; i++) { 1800 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1801 else break; 1802 } 1803 imark = i; 1804 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1805 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1806 } 1807 if (idx) { 1808 *idx = idx_p = mat->rowindices; 1809 if (imark > -1) { 1810 for (i=0; i<imark; i++) { 1811 idx_p[i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 for (i=0; i<nzB; i++) { 1815 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1816 else break; 1817 } 1818 imark = i; 1819 } 1820 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1821 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1822 } 1823 } else { 1824 if (idx) *idx = 0; 1825 if (v) *v = 0; 1826 } 1827 } 1828 *nz = nztot; 1829 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1830 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1839 1840 PetscFunctionBegin; 1841 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1842 aij->getrowactive = PETSC_FALSE; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatNorm_MPIAIJ" 1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1849 { 1850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1851 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1852 PetscErrorCode ierr; 1853 PetscInt i,j,cstart = mat->cmap->rstart; 1854 PetscReal sum = 0.0; 1855 MatScalar *v; 1856 1857 PetscFunctionBegin; 1858 if (aij->size == 1) { 1859 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1860 } else { 1861 if (type == NORM_FROBENIUS) { 1862 v = amat->a; 1863 for (i=0; i<amat->nz; i++) { 1864 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1865 } 1866 v = bmat->a; 1867 for (i=0; i<bmat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1871 *norm = PetscSqrtReal(*norm); 1872 } else if (type == NORM_1) { /* max column norm */ 1873 PetscReal *tmp,*tmp2; 1874 PetscInt *jj,*garray = aij->garray; 1875 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1876 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1877 *norm = 0.0; 1878 v = amat->a; jj = amat->j; 1879 for (j=0; j<amat->nz; j++) { 1880 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1881 } 1882 v = bmat->a; jj = bmat->j; 1883 for (j=0; j<bmat->nz; j++) { 1884 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1885 } 1886 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1887 for (j=0; j<mat->cmap->N; j++) { 1888 if (tmp2[j] > *norm) *norm = tmp2[j]; 1889 } 1890 ierr = PetscFree(tmp);CHKERRQ(ierr); 1891 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1892 } else if (type == NORM_INFINITY) { /* max row norm */ 1893 PetscReal ntemp = 0.0; 1894 for (j=0; j<aij->A->rmap->n; j++) { 1895 v = amat->a + amat->i[j]; 1896 sum = 0.0; 1897 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1898 sum += PetscAbsScalar(*v); v++; 1899 } 1900 v = bmat->a + bmat->i[j]; 1901 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1902 sum += PetscAbsScalar(*v); v++; 1903 } 1904 if (sum > ntemp) ntemp = sum; 1905 } 1906 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatTranspose_MPIAIJ" 1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1915 { 1916 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1917 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1918 PetscErrorCode ierr; 1919 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1920 PetscInt cstart = A->cmap->rstart,ncol; 1921 Mat B; 1922 MatScalar *array; 1923 1924 PetscFunctionBegin; 1925 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1926 1927 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1928 ai = Aloc->i; aj = Aloc->j; 1929 bi = Bloc->i; bj = Bloc->j; 1930 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1931 PetscInt *d_nnz,*g_nnz,*o_nnz; 1932 PetscSFNode *oloc; 1933 PETSC_UNUSED PetscSF sf; 1934 1935 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1936 /* compute d_nnz for preallocation */ 1937 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<ai[ma]; i++) { 1939 d_nnz[aj[i]]++; 1940 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1941 } 1942 /* compute local off-diagonal contributions */ 1943 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1944 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1945 /* map those to global */ 1946 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1947 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1948 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1949 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1950 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1951 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1952 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1953 1954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1956 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1957 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1958 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1959 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1960 } else { 1961 B = *matout; 1962 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1963 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1964 } 1965 1966 /* copy over the A part */ 1967 array = Aloc->a; 1968 row = A->rmap->rstart; 1969 for (i=0; i<ma; i++) { 1970 ncol = ai[i+1]-ai[i]; 1971 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1972 row++; 1973 array += ncol; aj += ncol; 1974 } 1975 aj = Aloc->j; 1976 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1977 1978 /* copy over the B part */ 1979 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1980 array = Bloc->a; 1981 row = A->rmap->rstart; 1982 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i=0; i<mb; i++) { 1985 ncol = bi[i+1]-bi[i]; 1986 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1987 row++; 1988 array += ncol; cols_tmp += ncol; 1989 } 1990 ierr = PetscFree(cols);CHKERRQ(ierr); 1991 1992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1995 *matout = B; 1996 } else { 1997 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2007 Mat a = aij->A,b = aij->B; 2008 PetscErrorCode ierr; 2009 PetscInt s1,s2,s3; 2010 2011 PetscFunctionBegin; 2012 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2013 if (rr) { 2014 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2015 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2016 /* Overlap communication with computation. */ 2017 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 } 2019 if (ll) { 2020 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2021 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2022 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2023 } 2024 /* scale the diagonal block */ 2025 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2026 2027 if (rr) { 2028 /* Do a scatter end and then right scale the off-diagonal block */ 2029 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatEqual_MPIAIJ" 2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2050 { 2051 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2052 Mat a,b,c,d; 2053 PetscBool flg; 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 a = matA->A; b = matA->B; 2058 c = matB->A; d = matB->B; 2059 2060 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2061 if (flg) { 2062 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2063 } 2064 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatCopy_MPIAIJ" 2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2071 { 2072 PetscErrorCode ierr; 2073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2074 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2075 2076 PetscFunctionBegin; 2077 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2078 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2079 /* because of the column compression in the off-processor part of the matrix a->B, 2080 the number of columns in a->B and b->B may be different, hence we cannot call 2081 the MatCopy() directly on the two parts. If need be, we can provide a more 2082 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2083 then copying the submatrices */ 2084 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2085 } else { 2086 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2087 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatSetUp_MPIAIJ" 2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2095 { 2096 PetscErrorCode ierr; 2097 2098 PetscFunctionBegin; 2099 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 /* 2104 Computes the number of nonzeros per row needed for preallocation when X and Y 2105 have different nonzero structure. 2106 */ 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2110 { 2111 PetscInt i,j,k,nzx,nzy; 2112 2113 PetscFunctionBegin; 2114 /* Set the number of nonzeros in the new matrix */ 2115 for (i=0; i<m; i++) { 2116 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2117 nzx = xi[i+1] - xi[i]; 2118 nzy = yi[i+1] - yi[i]; 2119 nnz[i] = 0; 2120 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2121 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2122 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2123 nnz[i]++; 2124 } 2125 for (; k<nzy; k++) nnz[i]++; 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt m = Y->rmap->N; 2137 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2138 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2139 2140 PetscFunctionBegin; 2141 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatAXPY_MPIAIJ" 2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2151 PetscBLASInt bnz,one=1; 2152 Mat_SeqAIJ *x,*y; 2153 2154 PetscFunctionBegin; 2155 if (str == SAME_NONZERO_PATTERN) { 2156 PetscScalar alpha = a; 2157 x = (Mat_SeqAIJ*)xx->A->data; 2158 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2159 y = (Mat_SeqAIJ*)yy->A->data; 2160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2161 x = (Mat_SeqAIJ*)xx->B->data; 2162 y = (Mat_SeqAIJ*)yy->B->data; 2163 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2164 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2165 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2166 } else { 2167 Mat B; 2168 PetscInt *nnz_d,*nnz_o; 2169 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2170 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2171 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2172 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2173 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2174 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2175 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2176 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2177 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2178 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2179 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2180 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2181 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2182 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2183 } 2184 PetscFunctionReturn(0); 2185 } 2186 2187 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2188 2189 #undef __FUNCT__ 2190 #define __FUNCT__ "MatConjugate_MPIAIJ" 2191 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2192 { 2193 #if defined(PETSC_USE_COMPLEX) 2194 PetscErrorCode ierr; 2195 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2196 2197 PetscFunctionBegin; 2198 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2199 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2200 #else 2201 PetscFunctionBegin; 2202 #endif 2203 PetscFunctionReturn(0); 2204 } 2205 2206 #undef __FUNCT__ 2207 #define __FUNCT__ "MatRealPart_MPIAIJ" 2208 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2209 { 2210 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2211 PetscErrorCode ierr; 2212 2213 PetscFunctionBegin; 2214 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2215 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2221 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2222 { 2223 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2224 PetscErrorCode ierr; 2225 2226 PetscFunctionBegin; 2227 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2228 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #if defined(PETSC_HAVE_PBGL) 2233 2234 #include <boost/parallel/mpi/bsp_process_group.hpp> 2235 #include <boost/graph/distributed/ilu_default_graph.hpp> 2236 #include <boost/graph/distributed/ilu_0_block.hpp> 2237 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2238 #include <boost/graph/distributed/petsc/interface.hpp> 2239 #include <boost/multi_array.hpp> 2240 #include <boost/parallel/distributed_property_map->hpp> 2241 2242 #undef __FUNCT__ 2243 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2244 /* 2245 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2246 */ 2247 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2248 { 2249 namespace petsc = boost::distributed::petsc; 2250 2251 namespace graph_dist = boost::graph::distributed; 2252 using boost::graph::distributed::ilu_default::process_group_type; 2253 using boost::graph::ilu_permuted; 2254 2255 PetscBool row_identity, col_identity; 2256 PetscContainer c; 2257 PetscInt m, n, M, N; 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2262 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2263 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2264 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2265 2266 process_group_type pg; 2267 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2268 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2269 lgraph_type& level_graph = *lgraph_p; 2270 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2271 2272 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2273 ilu_permuted(level_graph); 2274 2275 /* put together the new matrix */ 2276 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2277 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2278 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2279 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2280 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2281 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2282 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2283 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2284 2285 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2286 ierr = PetscContainerSetPointer(c, lgraph_p); 2287 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2288 ierr = PetscContainerDestroy(&c); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 #undef __FUNCT__ 2293 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2294 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2295 { 2296 PetscFunctionBegin; 2297 PetscFunctionReturn(0); 2298 } 2299 2300 #undef __FUNCT__ 2301 #define __FUNCT__ "MatSolve_MPIAIJ" 2302 /* 2303 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2304 */ 2305 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2306 { 2307 namespace graph_dist = boost::graph::distributed; 2308 2309 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2310 lgraph_type *lgraph_p; 2311 PetscContainer c; 2312 PetscErrorCode ierr; 2313 2314 PetscFunctionBegin; 2315 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2316 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2317 ierr = VecCopy(b, x);CHKERRQ(ierr); 2318 2319 PetscScalar *array_x; 2320 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2321 PetscInt sx; 2322 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2323 2324 PetscScalar *array_b; 2325 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2326 PetscInt sb; 2327 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2328 2329 lgraph_type& level_graph = *lgraph_p; 2330 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2331 2332 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2333 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2334 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2335 2336 typedef boost::iterator_property_map<array_ref_type::iterator, 2337 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2338 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2339 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2340 2341 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2342 PetscFunctionReturn(0); 2343 } 2344 #endif 2345 2346 #undef __FUNCT__ 2347 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2348 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2349 { 2350 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2351 PetscErrorCode ierr; 2352 PetscInt i,*idxb = 0; 2353 PetscScalar *va,*vb; 2354 Vec vtmp; 2355 2356 PetscFunctionBegin; 2357 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2358 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2359 if (idx) { 2360 for (i=0; i<A->rmap->n; i++) { 2361 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2362 } 2363 } 2364 2365 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2366 if (idx) { 2367 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2368 } 2369 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2370 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2371 2372 for (i=0; i<A->rmap->n; i++) { 2373 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2374 va[i] = vb[i]; 2375 if (idx) idx[i] = a->garray[idxb[i]]; 2376 } 2377 } 2378 2379 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2380 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2381 ierr = PetscFree(idxb);CHKERRQ(ierr); 2382 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 #undef __FUNCT__ 2387 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2388 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2389 { 2390 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2391 PetscErrorCode ierr; 2392 PetscInt i,*idxb = 0; 2393 PetscScalar *va,*vb; 2394 Vec vtmp; 2395 2396 PetscFunctionBegin; 2397 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2398 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2399 if (idx) { 2400 for (i=0; i<A->cmap->n; i++) { 2401 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2402 } 2403 } 2404 2405 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2406 if (idx) { 2407 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2408 } 2409 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2410 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2411 2412 for (i=0; i<A->rmap->n; i++) { 2413 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2414 va[i] = vb[i]; 2415 if (idx) idx[i] = a->garray[idxb[i]]; 2416 } 2417 } 2418 2419 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2420 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2421 ierr = PetscFree(idxb);CHKERRQ(ierr); 2422 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2423 PetscFunctionReturn(0); 2424 } 2425 2426 #undef __FUNCT__ 2427 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2428 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2429 { 2430 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2431 PetscInt n = A->rmap->n; 2432 PetscInt cstart = A->cmap->rstart; 2433 PetscInt *cmap = mat->garray; 2434 PetscInt *diagIdx, *offdiagIdx; 2435 Vec diagV, offdiagV; 2436 PetscScalar *a, *diagA, *offdiagA; 2437 PetscInt r; 2438 PetscErrorCode ierr; 2439 2440 PetscFunctionBegin; 2441 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2442 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2443 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2444 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2445 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2446 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 for (r = 0; r < n; ++r) { 2450 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2451 a[r] = diagA[r]; 2452 idx[r] = cstart + diagIdx[r]; 2453 } else { 2454 a[r] = offdiagA[r]; 2455 idx[r] = cmap[offdiagIdx[r]]; 2456 } 2457 } 2458 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2459 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2460 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2461 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2462 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2463 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2464 PetscFunctionReturn(0); 2465 } 2466 2467 #undef __FUNCT__ 2468 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2469 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2470 { 2471 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2472 PetscInt n = A->rmap->n; 2473 PetscInt cstart = A->cmap->rstart; 2474 PetscInt *cmap = mat->garray; 2475 PetscInt *diagIdx, *offdiagIdx; 2476 Vec diagV, offdiagV; 2477 PetscScalar *a, *diagA, *offdiagA; 2478 PetscInt r; 2479 PetscErrorCode ierr; 2480 2481 PetscFunctionBegin; 2482 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2483 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2484 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2485 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2486 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2487 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2488 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2489 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2490 for (r = 0; r < n; ++r) { 2491 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2492 a[r] = diagA[r]; 2493 idx[r] = cstart + diagIdx[r]; 2494 } else { 2495 a[r] = offdiagA[r]; 2496 idx[r] = cmap[offdiagIdx[r]]; 2497 } 2498 } 2499 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2500 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2501 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2502 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2503 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2504 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2505 PetscFunctionReturn(0); 2506 } 2507 2508 #undef __FUNCT__ 2509 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2510 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2511 { 2512 PetscErrorCode ierr; 2513 Mat *dummy; 2514 2515 PetscFunctionBegin; 2516 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2517 *newmat = *dummy; 2518 ierr = PetscFree(dummy);CHKERRQ(ierr); 2519 PetscFunctionReturn(0); 2520 } 2521 2522 #undef __FUNCT__ 2523 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2524 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2525 { 2526 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2527 PetscErrorCode ierr; 2528 2529 PetscFunctionBegin; 2530 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 #undef __FUNCT__ 2535 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2536 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2537 { 2538 PetscErrorCode ierr; 2539 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2540 2541 PetscFunctionBegin; 2542 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2543 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2544 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2545 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2546 PetscFunctionReturn(0); 2547 } 2548 2549 /* -------------------------------------------------------------------*/ 2550 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2551 MatGetRow_MPIAIJ, 2552 MatRestoreRow_MPIAIJ, 2553 MatMult_MPIAIJ, 2554 /* 4*/ MatMultAdd_MPIAIJ, 2555 MatMultTranspose_MPIAIJ, 2556 MatMultTransposeAdd_MPIAIJ, 2557 #if defined(PETSC_HAVE_PBGL) 2558 MatSolve_MPIAIJ, 2559 #else 2560 0, 2561 #endif 2562 0, 2563 0, 2564 /*10*/ 0, 2565 0, 2566 0, 2567 MatSOR_MPIAIJ, 2568 MatTranspose_MPIAIJ, 2569 /*15*/ MatGetInfo_MPIAIJ, 2570 MatEqual_MPIAIJ, 2571 MatGetDiagonal_MPIAIJ, 2572 MatDiagonalScale_MPIAIJ, 2573 MatNorm_MPIAIJ, 2574 /*20*/ MatAssemblyBegin_MPIAIJ, 2575 MatAssemblyEnd_MPIAIJ, 2576 MatSetOption_MPIAIJ, 2577 MatZeroEntries_MPIAIJ, 2578 /*24*/ MatZeroRows_MPIAIJ, 2579 0, 2580 #if defined(PETSC_HAVE_PBGL) 2581 0, 2582 #else 2583 0, 2584 #endif 2585 0, 2586 0, 2587 /*29*/ MatSetUp_MPIAIJ, 2588 #if defined(PETSC_HAVE_PBGL) 2589 0, 2590 #else 2591 0, 2592 #endif 2593 0, 2594 0, 2595 0, 2596 /*34*/ MatDuplicate_MPIAIJ, 2597 0, 2598 0, 2599 0, 2600 0, 2601 /*39*/ MatAXPY_MPIAIJ, 2602 MatGetSubMatrices_MPIAIJ, 2603 MatIncreaseOverlap_MPIAIJ, 2604 MatGetValues_MPIAIJ, 2605 MatCopy_MPIAIJ, 2606 /*44*/ MatGetRowMax_MPIAIJ, 2607 MatScale_MPIAIJ, 2608 0, 2609 MatDiagonalSet_MPIAIJ, 2610 MatZeroRowsColumns_MPIAIJ, 2611 /*49*/ MatSetRandom_MPIAIJ, 2612 0, 2613 0, 2614 0, 2615 0, 2616 /*54*/ MatFDColoringCreate_MPIXAIJ, 2617 0, 2618 MatSetUnfactored_MPIAIJ, 2619 MatPermute_MPIAIJ, 2620 0, 2621 /*59*/ MatGetSubMatrix_MPIAIJ, 2622 MatDestroy_MPIAIJ, 2623 MatView_MPIAIJ, 2624 0, 2625 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2626 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2627 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2628 0, 2629 0, 2630 0, 2631 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2632 MatGetRowMinAbs_MPIAIJ, 2633 0, 2634 MatSetColoring_MPIAIJ, 2635 0, 2636 MatSetValuesAdifor_MPIAIJ, 2637 /*75*/ MatFDColoringApply_AIJ, 2638 0, 2639 0, 2640 0, 2641 MatFindZeroDiagonals_MPIAIJ, 2642 /*80*/ 0, 2643 0, 2644 0, 2645 /*83*/ MatLoad_MPIAIJ, 2646 0, 2647 0, 2648 0, 2649 0, 2650 0, 2651 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2652 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2653 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2654 MatPtAP_MPIAIJ_MPIAIJ, 2655 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2656 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*99*/ 0, 2662 0, 2663 0, 2664 MatConjugate_MPIAIJ, 2665 0, 2666 /*104*/MatSetValuesRow_MPIAIJ, 2667 MatRealPart_MPIAIJ, 2668 MatImaginaryPart_MPIAIJ, 2669 0, 2670 0, 2671 /*109*/0, 2672 0, 2673 MatGetRowMin_MPIAIJ, 2674 0, 2675 0, 2676 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*119*/0, 2682 0, 2683 0, 2684 0, 2685 MatGetMultiProcBlock_MPIAIJ, 2686 /*124*/MatFindNonzeroRows_MPIAIJ, 2687 MatGetColumnNorms_MPIAIJ, 2688 MatInvertBlockDiagonal_MPIAIJ, 2689 0, 2690 MatGetSubMatricesParallel_MPIAIJ, 2691 /*129*/0, 2692 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2693 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2694 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2695 0, 2696 /*134*/0, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*139*/0, 2702 0, 2703 0, 2704 MatFDColoringSetUp_MPIXAIJ, 2705 0, 2706 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2707 }; 2708 2709 /* ----------------------------------------------------------------------------------------*/ 2710 2711 #undef __FUNCT__ 2712 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2713 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2714 { 2715 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2716 PetscErrorCode ierr; 2717 2718 PetscFunctionBegin; 2719 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2720 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2721 PetscFunctionReturn(0); 2722 } 2723 2724 #undef __FUNCT__ 2725 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2726 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2727 { 2728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2729 PetscErrorCode ierr; 2730 2731 PetscFunctionBegin; 2732 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2733 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2734 PetscFunctionReturn(0); 2735 } 2736 2737 #undef __FUNCT__ 2738 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2739 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2740 { 2741 Mat_MPIAIJ *b; 2742 PetscErrorCode ierr; 2743 2744 PetscFunctionBegin; 2745 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2746 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2747 b = (Mat_MPIAIJ*)B->data; 2748 2749 if (!B->preallocated) { 2750 /* Explicitly create 2 MATSEQAIJ matrices. */ 2751 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2752 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2753 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2754 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2755 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2756 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2757 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2758 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2759 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2760 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2761 } 2762 2763 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2764 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2765 B->preallocated = PETSC_TRUE; 2766 PetscFunctionReturn(0); 2767 } 2768 2769 #undef __FUNCT__ 2770 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2771 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2772 { 2773 Mat mat; 2774 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2775 PetscErrorCode ierr; 2776 2777 PetscFunctionBegin; 2778 *newmat = 0; 2779 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2780 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2781 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2782 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2783 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2784 a = (Mat_MPIAIJ*)mat->data; 2785 2786 mat->factortype = matin->factortype; 2787 mat->assembled = PETSC_TRUE; 2788 mat->insertmode = NOT_SET_VALUES; 2789 mat->preallocated = PETSC_TRUE; 2790 2791 a->size = oldmat->size; 2792 a->rank = oldmat->rank; 2793 a->donotstash = oldmat->donotstash; 2794 a->roworiented = oldmat->roworiented; 2795 a->rowindices = 0; 2796 a->rowvalues = 0; 2797 a->getrowactive = PETSC_FALSE; 2798 2799 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2801 2802 if (oldmat->colmap) { 2803 #if defined(PETSC_USE_CTABLE) 2804 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2805 #else 2806 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 2807 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2808 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2809 #endif 2810 } else a->colmap = 0; 2811 if (oldmat->garray) { 2812 PetscInt len; 2813 len = oldmat->B->cmap->n; 2814 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 2815 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2816 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2817 } else a->garray = 0; 2818 2819 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2820 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2821 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2822 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2823 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2825 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2826 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2827 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2828 *newmat = mat; 2829 PetscFunctionReturn(0); 2830 } 2831 2832 2833 2834 #undef __FUNCT__ 2835 #define __FUNCT__ "MatLoad_MPIAIJ" 2836 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2837 { 2838 PetscScalar *vals,*svals; 2839 MPI_Comm comm; 2840 PetscErrorCode ierr; 2841 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2842 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2843 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2844 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2845 PetscInt cend,cstart,n,*rowners,sizesset=1; 2846 int fd; 2847 PetscInt bs = 1; 2848 2849 PetscFunctionBegin; 2850 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2851 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2852 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2853 if (!rank) { 2854 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2855 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2856 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2857 } 2858 2859 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2860 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2861 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2862 2863 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2864 2865 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2866 M = header[1]; N = header[2]; 2867 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2868 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2869 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2870 2871 /* If global sizes are set, check if they are consistent with that given in the file */ 2872 if (sizesset) { 2873 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2874 } 2875 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2876 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2877 2878 /* determine ownership of all (block) rows */ 2879 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2880 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2881 else m = newMat->rmap->n; /* Set by user */ 2882 2883 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 2884 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2885 2886 /* First process needs enough room for process with most rows */ 2887 if (!rank) { 2888 mmax = rowners[1]; 2889 for (i=2; i<=size; i++) { 2890 mmax = PetscMax(mmax, rowners[i]); 2891 } 2892 } else mmax = -1; /* unused, but compilers complain */ 2893 2894 rowners[0] = 0; 2895 for (i=2; i<=size; i++) { 2896 rowners[i] += rowners[i-1]; 2897 } 2898 rstart = rowners[rank]; 2899 rend = rowners[rank+1]; 2900 2901 /* distribute row lengths to all processors */ 2902 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2903 if (!rank) { 2904 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2905 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2906 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2907 for (j=0; j<m; j++) { 2908 procsnz[0] += ourlens[j]; 2909 } 2910 for (i=1; i<size; i++) { 2911 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2912 /* calculate the number of nonzeros on each processor */ 2913 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2914 procsnz[i] += rowlengths[j]; 2915 } 2916 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2917 } 2918 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2919 } else { 2920 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2921 } 2922 2923 if (!rank) { 2924 /* determine max buffer needed and allocate it */ 2925 maxnz = 0; 2926 for (i=0; i<size; i++) { 2927 maxnz = PetscMax(maxnz,procsnz[i]); 2928 } 2929 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2930 2931 /* read in my part of the matrix column indices */ 2932 nz = procsnz[0]; 2933 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2934 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2935 2936 /* read in every one elses and ship off */ 2937 for (i=1; i<size; i++) { 2938 nz = procsnz[i]; 2939 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2940 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2941 } 2942 ierr = PetscFree(cols);CHKERRQ(ierr); 2943 } else { 2944 /* determine buffer space needed for message */ 2945 nz = 0; 2946 for (i=0; i<m; i++) { 2947 nz += ourlens[i]; 2948 } 2949 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2950 2951 /* receive message of column indices*/ 2952 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2953 } 2954 2955 /* determine column ownership if matrix is not square */ 2956 if (N != M) { 2957 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2958 else n = newMat->cmap->n; 2959 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2960 cstart = cend - n; 2961 } else { 2962 cstart = rstart; 2963 cend = rend; 2964 n = cend - cstart; 2965 } 2966 2967 /* loop over local rows, determining number of off diagonal entries */ 2968 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2969 jj = 0; 2970 for (i=0; i<m; i++) { 2971 for (j=0; j<ourlens[i]; j++) { 2972 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2973 jj++; 2974 } 2975 } 2976 2977 for (i=0; i<m; i++) { 2978 ourlens[i] -= offlens[i]; 2979 } 2980 if (!sizesset) { 2981 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2982 } 2983 2984 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2985 2986 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2987 2988 for (i=0; i<m; i++) { 2989 ourlens[i] += offlens[i]; 2990 } 2991 2992 if (!rank) { 2993 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 2994 2995 /* read in my part of the matrix numerical values */ 2996 nz = procsnz[0]; 2997 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2998 2999 /* insert into matrix */ 3000 jj = rstart; 3001 smycols = mycols; 3002 svals = vals; 3003 for (i=0; i<m; i++) { 3004 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3005 smycols += ourlens[i]; 3006 svals += ourlens[i]; 3007 jj++; 3008 } 3009 3010 /* read in other processors and ship out */ 3011 for (i=1; i<size; i++) { 3012 nz = procsnz[i]; 3013 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3014 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3015 } 3016 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3017 } else { 3018 /* receive numeric values */ 3019 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3020 3021 /* receive message of values*/ 3022 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3023 3024 /* insert into matrix */ 3025 jj = rstart; 3026 smycols = mycols; 3027 svals = vals; 3028 for (i=0; i<m; i++) { 3029 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3030 smycols += ourlens[i]; 3031 svals += ourlens[i]; 3032 jj++; 3033 } 3034 } 3035 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3036 ierr = PetscFree(vals);CHKERRQ(ierr); 3037 ierr = PetscFree(mycols);CHKERRQ(ierr); 3038 ierr = PetscFree(rowners);CHKERRQ(ierr); 3039 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3040 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 #undef __FUNCT__ 3045 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3046 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3047 { 3048 PetscErrorCode ierr; 3049 IS iscol_local; 3050 PetscInt csize; 3051 3052 PetscFunctionBegin; 3053 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3054 if (call == MAT_REUSE_MATRIX) { 3055 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3056 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3057 } else { 3058 PetscInt cbs; 3059 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3060 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3061 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3062 } 3063 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3064 if (call == MAT_INITIAL_MATRIX) { 3065 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3066 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3067 } 3068 PetscFunctionReturn(0); 3069 } 3070 3071 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3072 #undef __FUNCT__ 3073 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3074 /* 3075 Not great since it makes two copies of the submatrix, first an SeqAIJ 3076 in local and then by concatenating the local matrices the end result. 3077 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3078 3079 Note: This requires a sequential iscol with all indices. 3080 */ 3081 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3082 { 3083 PetscErrorCode ierr; 3084 PetscMPIInt rank,size; 3085 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3086 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3087 PetscBool allcolumns, colflag; 3088 Mat M,Mreuse; 3089 MatScalar *vwork,*aa; 3090 MPI_Comm comm; 3091 Mat_SeqAIJ *aij; 3092 3093 PetscFunctionBegin; 3094 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3095 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3096 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3097 3098 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3099 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3100 if (colflag && ncol == mat->cmap->N) { 3101 allcolumns = PETSC_TRUE; 3102 } else { 3103 allcolumns = PETSC_FALSE; 3104 } 3105 if (call == MAT_REUSE_MATRIX) { 3106 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3107 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3108 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3109 } else { 3110 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3111 } 3112 3113 /* 3114 m - number of local rows 3115 n - number of columns (same on all processors) 3116 rstart - first row in new global matrix generated 3117 */ 3118 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3119 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3120 if (call == MAT_INITIAL_MATRIX) { 3121 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3122 ii = aij->i; 3123 jj = aij->j; 3124 3125 /* 3126 Determine the number of non-zeros in the diagonal and off-diagonal 3127 portions of the matrix in order to do correct preallocation 3128 */ 3129 3130 /* first get start and end of "diagonal" columns */ 3131 if (csize == PETSC_DECIDE) { 3132 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3133 if (mglobal == n) { /* square matrix */ 3134 nlocal = m; 3135 } else { 3136 nlocal = n/size + ((n % size) > rank); 3137 } 3138 } else { 3139 nlocal = csize; 3140 } 3141 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3142 rstart = rend - nlocal; 3143 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3144 3145 /* next, compute all the lengths */ 3146 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3147 olens = dlens + m; 3148 for (i=0; i<m; i++) { 3149 jend = ii[i+1] - ii[i]; 3150 olen = 0; 3151 dlen = 0; 3152 for (j=0; j<jend; j++) { 3153 if (*jj < rstart || *jj >= rend) olen++; 3154 else dlen++; 3155 jj++; 3156 } 3157 olens[i] = olen; 3158 dlens[i] = dlen; 3159 } 3160 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3161 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3162 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3163 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3164 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3165 ierr = PetscFree(dlens);CHKERRQ(ierr); 3166 } else { 3167 PetscInt ml,nl; 3168 3169 M = *newmat; 3170 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3171 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3172 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3173 /* 3174 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3175 rather than the slower MatSetValues(). 3176 */ 3177 M->was_assembled = PETSC_TRUE; 3178 M->assembled = PETSC_FALSE; 3179 } 3180 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3181 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3182 ii = aij->i; 3183 jj = aij->j; 3184 aa = aij->a; 3185 for (i=0; i<m; i++) { 3186 row = rstart + i; 3187 nz = ii[i+1] - ii[i]; 3188 cwork = jj; jj += nz; 3189 vwork = aa; aa += nz; 3190 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3191 } 3192 3193 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3194 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3195 *newmat = M; 3196 3197 /* save submatrix used in processor for next request */ 3198 if (call == MAT_INITIAL_MATRIX) { 3199 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3200 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3201 } 3202 PetscFunctionReturn(0); 3203 } 3204 3205 #undef __FUNCT__ 3206 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3207 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3208 { 3209 PetscInt m,cstart, cend,j,nnz,i,d; 3210 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3211 const PetscInt *JJ; 3212 PetscScalar *values; 3213 PetscErrorCode ierr; 3214 3215 PetscFunctionBegin; 3216 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3217 3218 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3219 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3220 m = B->rmap->n; 3221 cstart = B->cmap->rstart; 3222 cend = B->cmap->rend; 3223 rstart = B->rmap->rstart; 3224 3225 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3226 3227 #if defined(PETSC_USE_DEBUGGING) 3228 for (i=0; i<m; i++) { 3229 nnz = Ii[i+1]- Ii[i]; 3230 JJ = J + Ii[i]; 3231 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3232 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3233 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3234 } 3235 #endif 3236 3237 for (i=0; i<m; i++) { 3238 nnz = Ii[i+1]- Ii[i]; 3239 JJ = J + Ii[i]; 3240 nnz_max = PetscMax(nnz_max,nnz); 3241 d = 0; 3242 for (j=0; j<nnz; j++) { 3243 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3244 } 3245 d_nnz[i] = d; 3246 o_nnz[i] = nnz - d; 3247 } 3248 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3249 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3250 3251 if (v) values = (PetscScalar*)v; 3252 else { 3253 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3254 } 3255 3256 for (i=0; i<m; i++) { 3257 ii = i + rstart; 3258 nnz = Ii[i+1]- Ii[i]; 3259 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3260 } 3261 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3262 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3263 3264 if (!v) { 3265 ierr = PetscFree(values);CHKERRQ(ierr); 3266 } 3267 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3268 PetscFunctionReturn(0); 3269 } 3270 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3273 /*@ 3274 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3275 (the default parallel PETSc format). 3276 3277 Collective on MPI_Comm 3278 3279 Input Parameters: 3280 + B - the matrix 3281 . i - the indices into j for the start of each local row (starts with zero) 3282 . j - the column indices for each local row (starts with zero) 3283 - v - optional values in the matrix 3284 3285 Level: developer 3286 3287 Notes: 3288 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3289 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3290 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3291 3292 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3293 3294 The format which is used for the sparse matrix input, is equivalent to a 3295 row-major ordering.. i.e for the following matrix, the input data expected is 3296 as shown: 3297 3298 1 0 0 3299 2 0 3 P0 3300 ------- 3301 4 5 6 P1 3302 3303 Process0 [P0]: rows_owned=[0,1] 3304 i = {0,1,3} [size = nrow+1 = 2+1] 3305 j = {0,0,2} [size = nz = 6] 3306 v = {1,2,3} [size = nz = 6] 3307 3308 Process1 [P1]: rows_owned=[2] 3309 i = {0,3} [size = nrow+1 = 1+1] 3310 j = {0,1,2} [size = nz = 6] 3311 v = {4,5,6} [size = nz = 6] 3312 3313 .keywords: matrix, aij, compressed row, sparse, parallel 3314 3315 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3316 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3317 @*/ 3318 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3319 { 3320 PetscErrorCode ierr; 3321 3322 PetscFunctionBegin; 3323 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3324 PetscFunctionReturn(0); 3325 } 3326 3327 #undef __FUNCT__ 3328 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3329 /*@C 3330 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3331 (the default parallel PETSc format). For good matrix assembly performance 3332 the user should preallocate the matrix storage by setting the parameters 3333 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3334 performance can be increased by more than a factor of 50. 3335 3336 Collective on MPI_Comm 3337 3338 Input Parameters: 3339 + B - the matrix 3340 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3341 (same value is used for all local rows) 3342 . d_nnz - array containing the number of nonzeros in the various rows of the 3343 DIAGONAL portion of the local submatrix (possibly different for each row) 3344 or NULL, if d_nz is used to specify the nonzero structure. 3345 The size of this array is equal to the number of local rows, i.e 'm'. 3346 For matrices that will be factored, you must leave room for (and set) 3347 the diagonal entry even if it is zero. 3348 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3349 submatrix (same value is used for all local rows). 3350 - o_nnz - array containing the number of nonzeros in the various rows of the 3351 OFF-DIAGONAL portion of the local submatrix (possibly different for 3352 each row) or NULL, if o_nz is used to specify the nonzero 3353 structure. The size of this array is equal to the number 3354 of local rows, i.e 'm'. 3355 3356 If the *_nnz parameter is given then the *_nz parameter is ignored 3357 3358 The AIJ format (also called the Yale sparse matrix format or 3359 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3360 storage. The stored row and column indices begin with zero. 3361 See Users-Manual: ch_mat for details. 3362 3363 The parallel matrix is partitioned such that the first m0 rows belong to 3364 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3365 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3366 3367 The DIAGONAL portion of the local submatrix of a processor can be defined 3368 as the submatrix which is obtained by extraction the part corresponding to 3369 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3370 first row that belongs to the processor, r2 is the last row belonging to 3371 the this processor, and c1-c2 is range of indices of the local part of a 3372 vector suitable for applying the matrix to. This is an mxn matrix. In the 3373 common case of a square matrix, the row and column ranges are the same and 3374 the DIAGONAL part is also square. The remaining portion of the local 3375 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3376 3377 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3378 3379 You can call MatGetInfo() to get information on how effective the preallocation was; 3380 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3381 You can also run with the option -info and look for messages with the string 3382 malloc in them to see if additional memory allocation was needed. 3383 3384 Example usage: 3385 3386 Consider the following 8x8 matrix with 34 non-zero values, that is 3387 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3388 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3389 as follows: 3390 3391 .vb 3392 1 2 0 | 0 3 0 | 0 4 3393 Proc0 0 5 6 | 7 0 0 | 8 0 3394 9 0 10 | 11 0 0 | 12 0 3395 ------------------------------------- 3396 13 0 14 | 15 16 17 | 0 0 3397 Proc1 0 18 0 | 19 20 21 | 0 0 3398 0 0 0 | 22 23 0 | 24 0 3399 ------------------------------------- 3400 Proc2 25 26 27 | 0 0 28 | 29 0 3401 30 0 0 | 31 32 33 | 0 34 3402 .ve 3403 3404 This can be represented as a collection of submatrices as: 3405 3406 .vb 3407 A B C 3408 D E F 3409 G H I 3410 .ve 3411 3412 Where the submatrices A,B,C are owned by proc0, D,E,F are 3413 owned by proc1, G,H,I are owned by proc2. 3414 3415 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3416 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3417 The 'M','N' parameters are 8,8, and have the same values on all procs. 3418 3419 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3420 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3421 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3422 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3423 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3424 matrix, ans [DF] as another SeqAIJ matrix. 3425 3426 When d_nz, o_nz parameters are specified, d_nz storage elements are 3427 allocated for every row of the local diagonal submatrix, and o_nz 3428 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3429 One way to choose d_nz and o_nz is to use the max nonzerors per local 3430 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3431 In this case, the values of d_nz,o_nz are: 3432 .vb 3433 proc0 : dnz = 2, o_nz = 2 3434 proc1 : dnz = 3, o_nz = 2 3435 proc2 : dnz = 1, o_nz = 4 3436 .ve 3437 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3438 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3439 for proc3. i.e we are using 12+15+10=37 storage locations to store 3440 34 values. 3441 3442 When d_nnz, o_nnz parameters are specified, the storage is specified 3443 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3444 In the above case the values for d_nnz,o_nnz are: 3445 .vb 3446 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3447 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3448 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3449 .ve 3450 Here the space allocated is sum of all the above values i.e 34, and 3451 hence pre-allocation is perfect. 3452 3453 Level: intermediate 3454 3455 .keywords: matrix, aij, compressed row, sparse, parallel 3456 3457 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3458 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3459 @*/ 3460 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3461 { 3462 PetscErrorCode ierr; 3463 3464 PetscFunctionBegin; 3465 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3466 PetscValidType(B,1); 3467 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3468 PetscFunctionReturn(0); 3469 } 3470 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3473 /*@ 3474 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3475 CSR format the local rows. 3476 3477 Collective on MPI_Comm 3478 3479 Input Parameters: 3480 + comm - MPI communicator 3481 . m - number of local rows (Cannot be PETSC_DECIDE) 3482 . n - This value should be the same as the local size used in creating the 3483 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3484 calculated if N is given) For square matrices n is almost always m. 3485 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3486 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3487 . i - row indices 3488 . j - column indices 3489 - a - matrix values 3490 3491 Output Parameter: 3492 . mat - the matrix 3493 3494 Level: intermediate 3495 3496 Notes: 3497 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3498 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3499 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3500 3501 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3502 3503 The format which is used for the sparse matrix input, is equivalent to a 3504 row-major ordering.. i.e for the following matrix, the input data expected is 3505 as shown: 3506 3507 1 0 0 3508 2 0 3 P0 3509 ------- 3510 4 5 6 P1 3511 3512 Process0 [P0]: rows_owned=[0,1] 3513 i = {0,1,3} [size = nrow+1 = 2+1] 3514 j = {0,0,2} [size = nz = 6] 3515 v = {1,2,3} [size = nz = 6] 3516 3517 Process1 [P1]: rows_owned=[2] 3518 i = {0,3} [size = nrow+1 = 1+1] 3519 j = {0,1,2} [size = nz = 6] 3520 v = {4,5,6} [size = nz = 6] 3521 3522 .keywords: matrix, aij, compressed row, sparse, parallel 3523 3524 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3525 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3526 @*/ 3527 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3528 { 3529 PetscErrorCode ierr; 3530 3531 PetscFunctionBegin; 3532 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3533 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3534 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3535 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3536 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3537 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3538 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3539 PetscFunctionReturn(0); 3540 } 3541 3542 #undef __FUNCT__ 3543 #define __FUNCT__ "MatCreateAIJ" 3544 /*@C 3545 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3546 (the default parallel PETSc format). For good matrix assembly performance 3547 the user should preallocate the matrix storage by setting the parameters 3548 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3549 performance can be increased by more than a factor of 50. 3550 3551 Collective on MPI_Comm 3552 3553 Input Parameters: 3554 + comm - MPI communicator 3555 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3556 This value should be the same as the local size used in creating the 3557 y vector for the matrix-vector product y = Ax. 3558 . n - This value should be the same as the local size used in creating the 3559 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3560 calculated if N is given) For square matrices n is almost always m. 3561 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3562 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3563 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3564 (same value is used for all local rows) 3565 . d_nnz - array containing the number of nonzeros in the various rows of the 3566 DIAGONAL portion of the local submatrix (possibly different for each row) 3567 or NULL, if d_nz is used to specify the nonzero structure. 3568 The size of this array is equal to the number of local rows, i.e 'm'. 3569 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3570 submatrix (same value is used for all local rows). 3571 - o_nnz - array containing the number of nonzeros in the various rows of the 3572 OFF-DIAGONAL portion of the local submatrix (possibly different for 3573 each row) or NULL, if o_nz is used to specify the nonzero 3574 structure. The size of this array is equal to the number 3575 of local rows, i.e 'm'. 3576 3577 Output Parameter: 3578 . A - the matrix 3579 3580 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3581 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3582 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3583 3584 Notes: 3585 If the *_nnz parameter is given then the *_nz parameter is ignored 3586 3587 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3588 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3589 storage requirements for this matrix. 3590 3591 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3592 processor than it must be used on all processors that share the object for 3593 that argument. 3594 3595 The user MUST specify either the local or global matrix dimensions 3596 (possibly both). 3597 3598 The parallel matrix is partitioned across processors such that the 3599 first m0 rows belong to process 0, the next m1 rows belong to 3600 process 1, the next m2 rows belong to process 2 etc.. where 3601 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3602 values corresponding to [m x N] submatrix. 3603 3604 The columns are logically partitioned with the n0 columns belonging 3605 to 0th partition, the next n1 columns belonging to the next 3606 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3607 3608 The DIAGONAL portion of the local submatrix on any given processor 3609 is the submatrix corresponding to the rows and columns m,n 3610 corresponding to the given processor. i.e diagonal matrix on 3611 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3612 etc. The remaining portion of the local submatrix [m x (N-n)] 3613 constitute the OFF-DIAGONAL portion. The example below better 3614 illustrates this concept. 3615 3616 For a square global matrix we define each processor's diagonal portion 3617 to be its local rows and the corresponding columns (a square submatrix); 3618 each processor's off-diagonal portion encompasses the remainder of the 3619 local matrix (a rectangular submatrix). 3620 3621 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3622 3623 When calling this routine with a single process communicator, a matrix of 3624 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3625 type of communicator, use the construction mechanism: 3626 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3627 3628 By default, this format uses inodes (identical nodes) when possible. 3629 We search for consecutive rows with the same nonzero structure, thereby 3630 reusing matrix information to achieve increased efficiency. 3631 3632 Options Database Keys: 3633 + -mat_no_inode - Do not use inodes 3634 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3635 - -mat_aij_oneindex - Internally use indexing starting at 1 3636 rather than 0. Note that when calling MatSetValues(), 3637 the user still MUST index entries starting at 0! 3638 3639 3640 Example usage: 3641 3642 Consider the following 8x8 matrix with 34 non-zero values, that is 3643 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3644 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3645 as follows: 3646 3647 .vb 3648 1 2 0 | 0 3 0 | 0 4 3649 Proc0 0 5 6 | 7 0 0 | 8 0 3650 9 0 10 | 11 0 0 | 12 0 3651 ------------------------------------- 3652 13 0 14 | 15 16 17 | 0 0 3653 Proc1 0 18 0 | 19 20 21 | 0 0 3654 0 0 0 | 22 23 0 | 24 0 3655 ------------------------------------- 3656 Proc2 25 26 27 | 0 0 28 | 29 0 3657 30 0 0 | 31 32 33 | 0 34 3658 .ve 3659 3660 This can be represented as a collection of submatrices as: 3661 3662 .vb 3663 A B C 3664 D E F 3665 G H I 3666 .ve 3667 3668 Where the submatrices A,B,C are owned by proc0, D,E,F are 3669 owned by proc1, G,H,I are owned by proc2. 3670 3671 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3672 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3673 The 'M','N' parameters are 8,8, and have the same values on all procs. 3674 3675 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3676 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3677 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3678 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3679 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3680 matrix, ans [DF] as another SeqAIJ matrix. 3681 3682 When d_nz, o_nz parameters are specified, d_nz storage elements are 3683 allocated for every row of the local diagonal submatrix, and o_nz 3684 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3685 One way to choose d_nz and o_nz is to use the max nonzerors per local 3686 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3687 In this case, the values of d_nz,o_nz are: 3688 .vb 3689 proc0 : dnz = 2, o_nz = 2 3690 proc1 : dnz = 3, o_nz = 2 3691 proc2 : dnz = 1, o_nz = 4 3692 .ve 3693 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3694 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3695 for proc3. i.e we are using 12+15+10=37 storage locations to store 3696 34 values. 3697 3698 When d_nnz, o_nnz parameters are specified, the storage is specified 3699 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3700 In the above case the values for d_nnz,o_nnz are: 3701 .vb 3702 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3703 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3704 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3705 .ve 3706 Here the space allocated is sum of all the above values i.e 34, and 3707 hence pre-allocation is perfect. 3708 3709 Level: intermediate 3710 3711 .keywords: matrix, aij, compressed row, sparse, parallel 3712 3713 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3714 MPIAIJ, MatCreateMPIAIJWithArrays() 3715 @*/ 3716 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3717 { 3718 PetscErrorCode ierr; 3719 PetscMPIInt size; 3720 3721 PetscFunctionBegin; 3722 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3723 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3724 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3725 if (size > 1) { 3726 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3727 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3728 } else { 3729 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3730 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3731 } 3732 PetscFunctionReturn(0); 3733 } 3734 3735 #undef __FUNCT__ 3736 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3737 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3738 { 3739 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3740 3741 PetscFunctionBegin; 3742 if (Ad) *Ad = a->A; 3743 if (Ao) *Ao = a->B; 3744 if (colmap) *colmap = a->garray; 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #undef __FUNCT__ 3749 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3750 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3751 { 3752 PetscErrorCode ierr; 3753 PetscInt i; 3754 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3755 3756 PetscFunctionBegin; 3757 if (coloring->ctype == IS_COLORING_GLOBAL) { 3758 ISColoringValue *allcolors,*colors; 3759 ISColoring ocoloring; 3760 3761 /* set coloring for diagonal portion */ 3762 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3763 3764 /* set coloring for off-diagonal portion */ 3765 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3766 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3767 for (i=0; i<a->B->cmap->n; i++) { 3768 colors[i] = allcolors[a->garray[i]]; 3769 } 3770 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3771 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3772 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3773 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3774 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3775 ISColoringValue *colors; 3776 PetscInt *larray; 3777 ISColoring ocoloring; 3778 3779 /* set coloring for diagonal portion */ 3780 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 3781 for (i=0; i<a->A->cmap->n; i++) { 3782 larray[i] = i + A->cmap->rstart; 3783 } 3784 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3785 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 3786 for (i=0; i<a->A->cmap->n; i++) { 3787 colors[i] = coloring->colors[larray[i]]; 3788 } 3789 ierr = PetscFree(larray);CHKERRQ(ierr); 3790 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3791 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3792 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3793 3794 /* set coloring for off-diagonal portion */ 3795 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 3796 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3797 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3798 for (i=0; i<a->B->cmap->n; i++) { 3799 colors[i] = coloring->colors[larray[i]]; 3800 } 3801 ierr = PetscFree(larray);CHKERRQ(ierr); 3802 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3803 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3804 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3805 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3806 PetscFunctionReturn(0); 3807 } 3808 3809 #undef __FUNCT__ 3810 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3811 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3812 { 3813 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3814 PetscErrorCode ierr; 3815 3816 PetscFunctionBegin; 3817 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3818 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 3822 #undef __FUNCT__ 3823 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3824 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3825 { 3826 PetscErrorCode ierr; 3827 PetscInt m,N,i,rstart,nnz,Ii; 3828 PetscInt *indx; 3829 PetscScalar *values; 3830 3831 PetscFunctionBegin; 3832 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3833 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3834 PetscInt *dnz,*onz,sum,bs,cbs; 3835 3836 if (n == PETSC_DECIDE) { 3837 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3838 } 3839 /* Check sum(n) = N */ 3840 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3841 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3842 3843 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3844 rstart -= m; 3845 3846 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3847 for (i=0; i<m; i++) { 3848 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3849 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3850 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3851 } 3852 3853 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3854 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3855 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3856 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3857 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3858 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3859 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3860 } 3861 3862 /* numeric phase */ 3863 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3864 for (i=0; i<m; i++) { 3865 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3866 Ii = i + rstart; 3867 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3868 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3869 } 3870 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3871 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 #undef __FUNCT__ 3876 #define __FUNCT__ "MatFileSplit" 3877 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3878 { 3879 PetscErrorCode ierr; 3880 PetscMPIInt rank; 3881 PetscInt m,N,i,rstart,nnz; 3882 size_t len; 3883 const PetscInt *indx; 3884 PetscViewer out; 3885 char *name; 3886 Mat B; 3887 const PetscScalar *values; 3888 3889 PetscFunctionBegin; 3890 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3891 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3892 /* Should this be the type of the diagonal block of A? */ 3893 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3894 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3895 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3896 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3897 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3898 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3899 for (i=0; i<m; i++) { 3900 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3901 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3902 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3903 } 3904 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3905 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3906 3907 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3908 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3909 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 3910 sprintf(name,"%s.%d",outfile,rank); 3911 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3912 ierr = PetscFree(name);CHKERRQ(ierr); 3913 ierr = MatView(B,out);CHKERRQ(ierr); 3914 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3915 ierr = MatDestroy(&B);CHKERRQ(ierr); 3916 PetscFunctionReturn(0); 3917 } 3918 3919 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3920 #undef __FUNCT__ 3921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3922 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3923 { 3924 PetscErrorCode ierr; 3925 Mat_Merge_SeqsToMPI *merge; 3926 PetscContainer container; 3927 3928 PetscFunctionBegin; 3929 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3930 if (container) { 3931 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3932 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3933 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3934 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3935 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3944 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3945 ierr = PetscFree(merge);CHKERRQ(ierr); 3946 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3947 } 3948 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3949 PetscFunctionReturn(0); 3950 } 3951 3952 #include <../src/mat/utils/freespace.h> 3953 #include <petscbt.h> 3954 3955 #undef __FUNCT__ 3956 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3957 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3958 { 3959 PetscErrorCode ierr; 3960 MPI_Comm comm; 3961 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3962 PetscMPIInt size,rank,taga,*len_s; 3963 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3964 PetscInt proc,m; 3965 PetscInt **buf_ri,**buf_rj; 3966 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3967 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3968 MPI_Request *s_waits,*r_waits; 3969 MPI_Status *status; 3970 MatScalar *aa=a->a; 3971 MatScalar **abuf_r,*ba_i; 3972 Mat_Merge_SeqsToMPI *merge; 3973 PetscContainer container; 3974 3975 PetscFunctionBegin; 3976 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3977 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3978 3979 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3980 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3981 3982 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3983 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3984 3985 bi = merge->bi; 3986 bj = merge->bj; 3987 buf_ri = merge->buf_ri; 3988 buf_rj = merge->buf_rj; 3989 3990 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3991 owners = merge->rowmap->range; 3992 len_s = merge->len_s; 3993 3994 /* send and recv matrix values */ 3995 /*-----------------------------*/ 3996 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3997 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3998 3999 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4000 for (proc=0,k=0; proc<size; proc++) { 4001 if (!len_s[proc]) continue; 4002 i = owners[proc]; 4003 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4004 k++; 4005 } 4006 4007 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4008 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4009 ierr = PetscFree(status);CHKERRQ(ierr); 4010 4011 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4012 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4013 4014 /* insert mat values of mpimat */ 4015 /*----------------------------*/ 4016 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4017 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4018 4019 for (k=0; k<merge->nrecv; k++) { 4020 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4021 nrows = *(buf_ri_k[k]); 4022 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4023 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4024 } 4025 4026 /* set values of ba */ 4027 m = merge->rowmap->n; 4028 for (i=0; i<m; i++) { 4029 arow = owners[rank] + i; 4030 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4031 bnzi = bi[i+1] - bi[i]; 4032 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4033 4034 /* add local non-zero vals of this proc's seqmat into ba */ 4035 anzi = ai[arow+1] - ai[arow]; 4036 aj = a->j + ai[arow]; 4037 aa = a->a + ai[arow]; 4038 nextaj = 0; 4039 for (j=0; nextaj<anzi; j++) { 4040 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4041 ba_i[j] += aa[nextaj++]; 4042 } 4043 } 4044 4045 /* add received vals into ba */ 4046 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4047 /* i-th row */ 4048 if (i == *nextrow[k]) { 4049 anzi = *(nextai[k]+1) - *nextai[k]; 4050 aj = buf_rj[k] + *(nextai[k]); 4051 aa = abuf_r[k] + *(nextai[k]); 4052 nextaj = 0; 4053 for (j=0; nextaj<anzi; j++) { 4054 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4055 ba_i[j] += aa[nextaj++]; 4056 } 4057 } 4058 nextrow[k]++; nextai[k]++; 4059 } 4060 } 4061 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4062 } 4063 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4064 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4065 4066 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4067 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4068 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4069 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4070 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4071 PetscFunctionReturn(0); 4072 } 4073 4074 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4075 4076 #undef __FUNCT__ 4077 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4078 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4079 { 4080 PetscErrorCode ierr; 4081 Mat B_mpi; 4082 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4083 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4084 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4085 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4086 PetscInt len,proc,*dnz,*onz,bs,cbs; 4087 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4088 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4089 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4090 MPI_Status *status; 4091 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4092 PetscBT lnkbt; 4093 Mat_Merge_SeqsToMPI *merge; 4094 PetscContainer container; 4095 4096 PetscFunctionBegin; 4097 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4098 4099 /* make sure it is a PETSc comm */ 4100 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4102 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4103 4104 ierr = PetscNew(&merge);CHKERRQ(ierr); 4105 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4106 4107 /* determine row ownership */ 4108 /*---------------------------------------------------------*/ 4109 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4110 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4111 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4112 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4113 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4114 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4115 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4116 4117 m = merge->rowmap->n; 4118 owners = merge->rowmap->range; 4119 4120 /* determine the number of messages to send, their lengths */ 4121 /*---------------------------------------------------------*/ 4122 len_s = merge->len_s; 4123 4124 len = 0; /* length of buf_si[] */ 4125 merge->nsend = 0; 4126 for (proc=0; proc<size; proc++) { 4127 len_si[proc] = 0; 4128 if (proc == rank) { 4129 len_s[proc] = 0; 4130 } else { 4131 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4132 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4133 } 4134 if (len_s[proc]) { 4135 merge->nsend++; 4136 nrows = 0; 4137 for (i=owners[proc]; i<owners[proc+1]; i++) { 4138 if (ai[i+1] > ai[i]) nrows++; 4139 } 4140 len_si[proc] = 2*(nrows+1); 4141 len += len_si[proc]; 4142 } 4143 } 4144 4145 /* determine the number and length of messages to receive for ij-structure */ 4146 /*-------------------------------------------------------------------------*/ 4147 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4148 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4149 4150 /* post the Irecv of j-structure */ 4151 /*-------------------------------*/ 4152 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4153 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4154 4155 /* post the Isend of j-structure */ 4156 /*--------------------------------*/ 4157 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4158 4159 for (proc=0, k=0; proc<size; proc++) { 4160 if (!len_s[proc]) continue; 4161 i = owners[proc]; 4162 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4163 k++; 4164 } 4165 4166 /* receives and sends of j-structure are complete */ 4167 /*------------------------------------------------*/ 4168 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4169 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4170 4171 /* send and recv i-structure */ 4172 /*---------------------------*/ 4173 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4174 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4175 4176 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4177 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4178 for (proc=0,k=0; proc<size; proc++) { 4179 if (!len_s[proc]) continue; 4180 /* form outgoing message for i-structure: 4181 buf_si[0]: nrows to be sent 4182 [1:nrows]: row index (global) 4183 [nrows+1:2*nrows+1]: i-structure index 4184 */ 4185 /*-------------------------------------------*/ 4186 nrows = len_si[proc]/2 - 1; 4187 buf_si_i = buf_si + nrows+1; 4188 buf_si[0] = nrows; 4189 buf_si_i[0] = 0; 4190 nrows = 0; 4191 for (i=owners[proc]; i<owners[proc+1]; i++) { 4192 anzi = ai[i+1] - ai[i]; 4193 if (anzi) { 4194 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4195 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4196 nrows++; 4197 } 4198 } 4199 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4200 k++; 4201 buf_si += len_si[proc]; 4202 } 4203 4204 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4205 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4206 4207 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4208 for (i=0; i<merge->nrecv; i++) { 4209 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4210 } 4211 4212 ierr = PetscFree(len_si);CHKERRQ(ierr); 4213 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4214 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4215 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4216 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4217 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4218 ierr = PetscFree(status);CHKERRQ(ierr); 4219 4220 /* compute a local seq matrix in each processor */ 4221 /*----------------------------------------------*/ 4222 /* allocate bi array and free space for accumulating nonzero column info */ 4223 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4224 bi[0] = 0; 4225 4226 /* create and initialize a linked list */ 4227 nlnk = N+1; 4228 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4229 4230 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4231 len = ai[owners[rank+1]] - ai[owners[rank]]; 4232 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4233 4234 current_space = free_space; 4235 4236 /* determine symbolic info for each local row */ 4237 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4238 4239 for (k=0; k<merge->nrecv; k++) { 4240 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4241 nrows = *buf_ri_k[k]; 4242 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4243 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4244 } 4245 4246 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4247 len = 0; 4248 for (i=0; i<m; i++) { 4249 bnzi = 0; 4250 /* add local non-zero cols of this proc's seqmat into lnk */ 4251 arow = owners[rank] + i; 4252 anzi = ai[arow+1] - ai[arow]; 4253 aj = a->j + ai[arow]; 4254 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4255 bnzi += nlnk; 4256 /* add received col data into lnk */ 4257 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4258 if (i == *nextrow[k]) { /* i-th row */ 4259 anzi = *(nextai[k]+1) - *nextai[k]; 4260 aj = buf_rj[k] + *nextai[k]; 4261 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4262 bnzi += nlnk; 4263 nextrow[k]++; nextai[k]++; 4264 } 4265 } 4266 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4267 4268 /* if free space is not available, make more free space */ 4269 if (current_space->local_remaining<bnzi) { 4270 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4271 nspacedouble++; 4272 } 4273 /* copy data into free space, then initialize lnk */ 4274 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4275 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4276 4277 current_space->array += bnzi; 4278 current_space->local_used += bnzi; 4279 current_space->local_remaining -= bnzi; 4280 4281 bi[i+1] = bi[i] + bnzi; 4282 } 4283 4284 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4285 4286 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4287 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4288 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4289 4290 /* create symbolic parallel matrix B_mpi */ 4291 /*---------------------------------------*/ 4292 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4293 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4294 if (n==PETSC_DECIDE) { 4295 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4296 } else { 4297 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4298 } 4299 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4300 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4301 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4302 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4303 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4304 4305 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4306 B_mpi->assembled = PETSC_FALSE; 4307 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4308 merge->bi = bi; 4309 merge->bj = bj; 4310 merge->buf_ri = buf_ri; 4311 merge->buf_rj = buf_rj; 4312 merge->coi = NULL; 4313 merge->coj = NULL; 4314 merge->owners_co = NULL; 4315 4316 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4317 4318 /* attach the supporting struct to B_mpi for reuse */ 4319 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4320 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4321 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4322 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4323 *mpimat = B_mpi; 4324 4325 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4331 /*@C 4332 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4333 matrices from each processor 4334 4335 Collective on MPI_Comm 4336 4337 Input Parameters: 4338 + comm - the communicators the parallel matrix will live on 4339 . seqmat - the input sequential matrices 4340 . m - number of local rows (or PETSC_DECIDE) 4341 . n - number of local columns (or PETSC_DECIDE) 4342 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4343 4344 Output Parameter: 4345 . mpimat - the parallel matrix generated 4346 4347 Level: advanced 4348 4349 Notes: 4350 The dimensions of the sequential matrix in each processor MUST be the same. 4351 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4352 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4353 @*/ 4354 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4355 { 4356 PetscErrorCode ierr; 4357 PetscMPIInt size; 4358 4359 PetscFunctionBegin; 4360 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4361 if (size == 1) { 4362 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4363 if (scall == MAT_INITIAL_MATRIX) { 4364 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4365 } else { 4366 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4367 } 4368 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4369 PetscFunctionReturn(0); 4370 } 4371 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4372 if (scall == MAT_INITIAL_MATRIX) { 4373 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4374 } 4375 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4376 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4377 PetscFunctionReturn(0); 4378 } 4379 4380 #undef __FUNCT__ 4381 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4382 /*@ 4383 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4384 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4385 with MatGetSize() 4386 4387 Not Collective 4388 4389 Input Parameters: 4390 + A - the matrix 4391 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4392 4393 Output Parameter: 4394 . A_loc - the local sequential matrix generated 4395 4396 Level: developer 4397 4398 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4399 4400 @*/ 4401 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4402 { 4403 PetscErrorCode ierr; 4404 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4405 Mat_SeqAIJ *mat,*a,*b; 4406 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4407 MatScalar *aa,*ba,*cam; 4408 PetscScalar *ca; 4409 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4410 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4411 PetscBool match; 4412 MPI_Comm comm; 4413 PetscMPIInt size; 4414 4415 PetscFunctionBegin; 4416 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4417 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4418 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4419 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4420 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4421 4422 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4423 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4424 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4425 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4426 aa = a->a; ba = b->a; 4427 if (scall == MAT_INITIAL_MATRIX) { 4428 if (size == 1) { 4429 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4430 PetscFunctionReturn(0); 4431 } 4432 4433 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4434 ci[0] = 0; 4435 for (i=0; i<am; i++) { 4436 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4437 } 4438 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4439 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4440 k = 0; 4441 for (i=0; i<am; i++) { 4442 ncols_o = bi[i+1] - bi[i]; 4443 ncols_d = ai[i+1] - ai[i]; 4444 /* off-diagonal portion of A */ 4445 for (jo=0; jo<ncols_o; jo++) { 4446 col = cmap[*bj]; 4447 if (col >= cstart) break; 4448 cj[k] = col; bj++; 4449 ca[k++] = *ba++; 4450 } 4451 /* diagonal portion of A */ 4452 for (j=0; j<ncols_d; j++) { 4453 cj[k] = cstart + *aj++; 4454 ca[k++] = *aa++; 4455 } 4456 /* off-diagonal portion of A */ 4457 for (j=jo; j<ncols_o; j++) { 4458 cj[k] = cmap[*bj++]; 4459 ca[k++] = *ba++; 4460 } 4461 } 4462 /* put together the new matrix */ 4463 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4464 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4465 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4466 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4467 mat->free_a = PETSC_TRUE; 4468 mat->free_ij = PETSC_TRUE; 4469 mat->nonew = 0; 4470 } else if (scall == MAT_REUSE_MATRIX) { 4471 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4472 ci = mat->i; cj = mat->j; cam = mat->a; 4473 for (i=0; i<am; i++) { 4474 /* off-diagonal portion of A */ 4475 ncols_o = bi[i+1] - bi[i]; 4476 for (jo=0; jo<ncols_o; jo++) { 4477 col = cmap[*bj]; 4478 if (col >= cstart) break; 4479 *cam++ = *ba++; bj++; 4480 } 4481 /* diagonal portion of A */ 4482 ncols_d = ai[i+1] - ai[i]; 4483 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4484 /* off-diagonal portion of A */ 4485 for (j=jo; j<ncols_o; j++) { 4486 *cam++ = *ba++; bj++; 4487 } 4488 } 4489 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4490 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4491 PetscFunctionReturn(0); 4492 } 4493 4494 #undef __FUNCT__ 4495 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4496 /*@C 4497 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4498 4499 Not Collective 4500 4501 Input Parameters: 4502 + A - the matrix 4503 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4504 - row, col - index sets of rows and columns to extract (or NULL) 4505 4506 Output Parameter: 4507 . A_loc - the local sequential matrix generated 4508 4509 Level: developer 4510 4511 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4512 4513 @*/ 4514 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4515 { 4516 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4517 PetscErrorCode ierr; 4518 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4519 IS isrowa,iscola; 4520 Mat *aloc; 4521 PetscBool match; 4522 4523 PetscFunctionBegin; 4524 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4525 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4526 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4527 if (!row) { 4528 start = A->rmap->rstart; end = A->rmap->rend; 4529 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4530 } else { 4531 isrowa = *row; 4532 } 4533 if (!col) { 4534 start = A->cmap->rstart; 4535 cmap = a->garray; 4536 nzA = a->A->cmap->n; 4537 nzB = a->B->cmap->n; 4538 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4539 ncols = 0; 4540 for (i=0; i<nzB; i++) { 4541 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4542 else break; 4543 } 4544 imark = i; 4545 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4546 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4547 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4548 } else { 4549 iscola = *col; 4550 } 4551 if (scall != MAT_INITIAL_MATRIX) { 4552 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4553 aloc[0] = *A_loc; 4554 } 4555 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4556 *A_loc = aloc[0]; 4557 ierr = PetscFree(aloc);CHKERRQ(ierr); 4558 if (!row) { 4559 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4560 } 4561 if (!col) { 4562 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4563 } 4564 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4565 PetscFunctionReturn(0); 4566 } 4567 4568 #undef __FUNCT__ 4569 #define __FUNCT__ "MatGetBrowsOfAcols" 4570 /*@C 4571 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4572 4573 Collective on Mat 4574 4575 Input Parameters: 4576 + A,B - the matrices in mpiaij format 4577 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4578 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4579 4580 Output Parameter: 4581 + rowb, colb - index sets of rows and columns of B to extract 4582 - B_seq - the sequential matrix generated 4583 4584 Level: developer 4585 4586 @*/ 4587 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4588 { 4589 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4590 PetscErrorCode ierr; 4591 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4592 IS isrowb,iscolb; 4593 Mat *bseq=NULL; 4594 4595 PetscFunctionBegin; 4596 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4597 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4598 } 4599 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4600 4601 if (scall == MAT_INITIAL_MATRIX) { 4602 start = A->cmap->rstart; 4603 cmap = a->garray; 4604 nzA = a->A->cmap->n; 4605 nzB = a->B->cmap->n; 4606 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4607 ncols = 0; 4608 for (i=0; i<nzB; i++) { /* row < local row index */ 4609 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4610 else break; 4611 } 4612 imark = i; 4613 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4614 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4615 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4616 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4617 } else { 4618 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4619 isrowb = *rowb; iscolb = *colb; 4620 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4621 bseq[0] = *B_seq; 4622 } 4623 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4624 *B_seq = bseq[0]; 4625 ierr = PetscFree(bseq);CHKERRQ(ierr); 4626 if (!rowb) { 4627 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4628 } else { 4629 *rowb = isrowb; 4630 } 4631 if (!colb) { 4632 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4633 } else { 4634 *colb = iscolb; 4635 } 4636 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4637 PetscFunctionReturn(0); 4638 } 4639 4640 #undef __FUNCT__ 4641 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4642 /* 4643 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4644 of the OFF-DIAGONAL portion of local A 4645 4646 Collective on Mat 4647 4648 Input Parameters: 4649 + A,B - the matrices in mpiaij format 4650 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4651 4652 Output Parameter: 4653 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4654 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4655 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4656 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4657 4658 Level: developer 4659 4660 */ 4661 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4662 { 4663 VecScatter_MPI_General *gen_to,*gen_from; 4664 PetscErrorCode ierr; 4665 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4666 Mat_SeqAIJ *b_oth; 4667 VecScatter ctx =a->Mvctx; 4668 MPI_Comm comm; 4669 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4670 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4671 PetscScalar *rvalues,*svalues; 4672 MatScalar *b_otha,*bufa,*bufA; 4673 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4674 MPI_Request *rwaits = NULL,*swaits = NULL; 4675 MPI_Status *sstatus,rstatus; 4676 PetscMPIInt jj,size; 4677 PetscInt *cols,sbs,rbs; 4678 PetscScalar *vals; 4679 4680 PetscFunctionBegin; 4681 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4682 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4683 if (size == 1) PetscFunctionReturn(0); 4684 4685 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4686 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4687 } 4688 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4689 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4690 4691 gen_to = (VecScatter_MPI_General*)ctx->todata; 4692 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4693 rvalues = gen_from->values; /* holds the length of receiving row */ 4694 svalues = gen_to->values; /* holds the length of sending row */ 4695 nrecvs = gen_from->n; 4696 nsends = gen_to->n; 4697 4698 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4699 srow = gen_to->indices; /* local row index to be sent */ 4700 sstarts = gen_to->starts; 4701 sprocs = gen_to->procs; 4702 sstatus = gen_to->sstatus; 4703 sbs = gen_to->bs; 4704 rstarts = gen_from->starts; 4705 rprocs = gen_from->procs; 4706 rbs = gen_from->bs; 4707 4708 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4709 if (scall == MAT_INITIAL_MATRIX) { 4710 /* i-array */ 4711 /*---------*/ 4712 /* post receives */ 4713 for (i=0; i<nrecvs; i++) { 4714 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4715 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4716 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4717 } 4718 4719 /* pack the outgoing message */ 4720 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4721 4722 sstartsj[0] = 0; 4723 rstartsj[0] = 0; 4724 len = 0; /* total length of j or a array to be sent */ 4725 k = 0; 4726 for (i=0; i<nsends; i++) { 4727 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4728 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4729 for (j=0; j<nrows; j++) { 4730 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4731 for (l=0; l<sbs; l++) { 4732 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4733 4734 rowlen[j*sbs+l] = ncols; 4735 4736 len += ncols; 4737 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4738 } 4739 k++; 4740 } 4741 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4742 4743 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4744 } 4745 /* recvs and sends of i-array are completed */ 4746 i = nrecvs; 4747 while (i--) { 4748 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4749 } 4750 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4751 4752 /* allocate buffers for sending j and a arrays */ 4753 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 4754 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 4755 4756 /* create i-array of B_oth */ 4757 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 4758 4759 b_othi[0] = 0; 4760 len = 0; /* total length of j or a array to be received */ 4761 k = 0; 4762 for (i=0; i<nrecvs; i++) { 4763 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4764 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4765 for (j=0; j<nrows; j++) { 4766 b_othi[k+1] = b_othi[k] + rowlen[j]; 4767 len += rowlen[j]; k++; 4768 } 4769 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4770 } 4771 4772 /* allocate space for j and a arrrays of B_oth */ 4773 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 4774 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 4775 4776 /* j-array */ 4777 /*---------*/ 4778 /* post receives of j-array */ 4779 for (i=0; i<nrecvs; i++) { 4780 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4781 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4782 } 4783 4784 /* pack the outgoing message j-array */ 4785 k = 0; 4786 for (i=0; i<nsends; i++) { 4787 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4788 bufJ = bufj+sstartsj[i]; 4789 for (j=0; j<nrows; j++) { 4790 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4791 for (ll=0; ll<sbs; ll++) { 4792 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4793 for (l=0; l<ncols; l++) { 4794 *bufJ++ = cols[l]; 4795 } 4796 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4797 } 4798 } 4799 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4800 } 4801 4802 /* recvs and sends of j-array are completed */ 4803 i = nrecvs; 4804 while (i--) { 4805 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4806 } 4807 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4808 } else if (scall == MAT_REUSE_MATRIX) { 4809 sstartsj = *startsj_s; 4810 rstartsj = *startsj_r; 4811 bufa = *bufa_ptr; 4812 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4813 b_otha = b_oth->a; 4814 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4815 4816 /* a-array */ 4817 /*---------*/ 4818 /* post receives of a-array */ 4819 for (i=0; i<nrecvs; i++) { 4820 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4821 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4822 } 4823 4824 /* pack the outgoing message a-array */ 4825 k = 0; 4826 for (i=0; i<nsends; i++) { 4827 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4828 bufA = bufa+sstartsj[i]; 4829 for (j=0; j<nrows; j++) { 4830 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4831 for (ll=0; ll<sbs; ll++) { 4832 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4833 for (l=0; l<ncols; l++) { 4834 *bufA++ = vals[l]; 4835 } 4836 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4837 } 4838 } 4839 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4840 } 4841 /* recvs and sends of a-array are completed */ 4842 i = nrecvs; 4843 while (i--) { 4844 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4845 } 4846 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4847 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4848 4849 if (scall == MAT_INITIAL_MATRIX) { 4850 /* put together the new matrix */ 4851 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4852 4853 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4854 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4855 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4856 b_oth->free_a = PETSC_TRUE; 4857 b_oth->free_ij = PETSC_TRUE; 4858 b_oth->nonew = 0; 4859 4860 ierr = PetscFree(bufj);CHKERRQ(ierr); 4861 if (!startsj_s || !bufa_ptr) { 4862 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4863 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4864 } else { 4865 *startsj_s = sstartsj; 4866 *startsj_r = rstartsj; 4867 *bufa_ptr = bufa; 4868 } 4869 } 4870 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4871 PetscFunctionReturn(0); 4872 } 4873 4874 #undef __FUNCT__ 4875 #define __FUNCT__ "MatGetCommunicationStructs" 4876 /*@C 4877 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4878 4879 Not Collective 4880 4881 Input Parameters: 4882 . A - The matrix in mpiaij format 4883 4884 Output Parameter: 4885 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4886 . colmap - A map from global column index to local index into lvec 4887 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4888 4889 Level: developer 4890 4891 @*/ 4892 #if defined(PETSC_USE_CTABLE) 4893 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4894 #else 4895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4896 #endif 4897 { 4898 Mat_MPIAIJ *a; 4899 4900 PetscFunctionBegin; 4901 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4902 PetscValidPointer(lvec, 2); 4903 PetscValidPointer(colmap, 3); 4904 PetscValidPointer(multScatter, 4); 4905 a = (Mat_MPIAIJ*) A->data; 4906 if (lvec) *lvec = a->lvec; 4907 if (colmap) *colmap = a->colmap; 4908 if (multScatter) *multScatter = a->Mvctx; 4909 PetscFunctionReturn(0); 4910 } 4911 4912 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4913 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4914 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4915 #if defined(PETSC_HAVE_ELEMENTAL) 4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4917 #endif 4918 4919 #undef __FUNCT__ 4920 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4921 /* 4922 Computes (B'*A')' since computing B*A directly is untenable 4923 4924 n p p 4925 ( ) ( ) ( ) 4926 m ( A ) * n ( B ) = m ( C ) 4927 ( ) ( ) ( ) 4928 4929 */ 4930 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4931 { 4932 PetscErrorCode ierr; 4933 Mat At,Bt,Ct; 4934 4935 PetscFunctionBegin; 4936 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4937 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4938 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4939 ierr = MatDestroy(&At);CHKERRQ(ierr); 4940 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4941 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4942 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4943 PetscFunctionReturn(0); 4944 } 4945 4946 #undef __FUNCT__ 4947 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4948 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4949 { 4950 PetscErrorCode ierr; 4951 PetscInt m=A->rmap->n,n=B->cmap->n; 4952 Mat Cmat; 4953 4954 PetscFunctionBegin; 4955 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4956 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4957 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4958 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4959 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4960 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4961 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4962 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4963 4964 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4965 4966 *C = Cmat; 4967 PetscFunctionReturn(0); 4968 } 4969 4970 /* ----------------------------------------------------------------*/ 4971 #undef __FUNCT__ 4972 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4973 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4974 { 4975 PetscErrorCode ierr; 4976 4977 PetscFunctionBegin; 4978 if (scall == MAT_INITIAL_MATRIX) { 4979 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4980 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4981 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4982 } 4983 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4984 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4985 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4986 PetscFunctionReturn(0); 4987 } 4988 4989 /*MC 4990 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4991 4992 Options Database Keys: 4993 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4994 4995 Level: beginner 4996 4997 .seealso: MatCreateAIJ() 4998 M*/ 4999 5000 #undef __FUNCT__ 5001 #define __FUNCT__ "MatCreate_MPIAIJ" 5002 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5003 { 5004 Mat_MPIAIJ *b; 5005 PetscErrorCode ierr; 5006 PetscMPIInt size; 5007 5008 PetscFunctionBegin; 5009 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5010 5011 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5012 B->data = (void*)b; 5013 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5014 B->assembled = PETSC_FALSE; 5015 B->insertmode = NOT_SET_VALUES; 5016 b->size = size; 5017 5018 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5019 5020 /* build cache for off array entries formed */ 5021 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5022 5023 b->donotstash = PETSC_FALSE; 5024 b->colmap = 0; 5025 b->garray = 0; 5026 b->roworiented = PETSC_TRUE; 5027 5028 /* stuff used for matrix vector multiply */ 5029 b->lvec = NULL; 5030 b->Mvctx = NULL; 5031 5032 /* stuff for MatGetRow() */ 5033 b->rowindices = 0; 5034 b->rowvalues = 0; 5035 b->getrowactive = PETSC_FALSE; 5036 5037 /* flexible pointer used in CUSP/CUSPARSE classes */ 5038 b->spptr = NULL; 5039 5040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5050 #if defined(PETSC_HAVE_ELEMENTAL) 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5052 #endif 5053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5056 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5057 PetscFunctionReturn(0); 5058 } 5059 5060 #undef __FUNCT__ 5061 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5062 /*@C 5063 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5064 and "off-diagonal" part of the matrix in CSR format. 5065 5066 Collective on MPI_Comm 5067 5068 Input Parameters: 5069 + comm - MPI communicator 5070 . m - number of local rows (Cannot be PETSC_DECIDE) 5071 . n - This value should be the same as the local size used in creating the 5072 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5073 calculated if N is given) For square matrices n is almost always m. 5074 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5075 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5076 . i - row indices for "diagonal" portion of matrix 5077 . j - column indices 5078 . a - matrix values 5079 . oi - row indices for "off-diagonal" portion of matrix 5080 . oj - column indices 5081 - oa - matrix values 5082 5083 Output Parameter: 5084 . mat - the matrix 5085 5086 Level: advanced 5087 5088 Notes: 5089 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5090 must free the arrays once the matrix has been destroyed and not before. 5091 5092 The i and j indices are 0 based 5093 5094 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5095 5096 This sets local rows and cannot be used to set off-processor values. 5097 5098 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5099 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5100 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5101 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5102 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5103 communication if it is known that only local entries will be set. 5104 5105 .keywords: matrix, aij, compressed row, sparse, parallel 5106 5107 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5108 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5109 C@*/ 5110 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5111 { 5112 PetscErrorCode ierr; 5113 Mat_MPIAIJ *maij; 5114 5115 PetscFunctionBegin; 5116 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5117 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5118 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5119 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5120 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5121 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5122 maij = (Mat_MPIAIJ*) (*mat)->data; 5123 5124 (*mat)->preallocated = PETSC_TRUE; 5125 5126 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5127 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5128 5129 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5130 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5131 5132 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5133 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5134 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5135 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5136 5137 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5139 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5140 PetscFunctionReturn(0); 5141 } 5142 5143 /* 5144 Special version for direct calls from Fortran 5145 */ 5146 #include <petsc-private/fortranimpl.h> 5147 5148 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5149 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5150 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5151 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5152 #endif 5153 5154 /* Change these macros so can be used in void function */ 5155 #undef CHKERRQ 5156 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5157 #undef SETERRQ2 5158 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5159 #undef SETERRQ3 5160 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5161 #undef SETERRQ 5162 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5163 5164 #undef __FUNCT__ 5165 #define __FUNCT__ "matsetvaluesmpiaij_" 5166 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5167 { 5168 Mat mat = *mmat; 5169 PetscInt m = *mm, n = *mn; 5170 InsertMode addv = *maddv; 5171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5172 PetscScalar value; 5173 PetscErrorCode ierr; 5174 5175 MatCheckPreallocated(mat,1); 5176 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5177 5178 #if defined(PETSC_USE_DEBUG) 5179 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5180 #endif 5181 { 5182 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5183 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5184 PetscBool roworiented = aij->roworiented; 5185 5186 /* Some Variables required in the macro */ 5187 Mat A = aij->A; 5188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5189 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5190 MatScalar *aa = a->a; 5191 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5192 Mat B = aij->B; 5193 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5194 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5195 MatScalar *ba = b->a; 5196 5197 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5198 PetscInt nonew = a->nonew; 5199 MatScalar *ap1,*ap2; 5200 5201 PetscFunctionBegin; 5202 for (i=0; i<m; i++) { 5203 if (im[i] < 0) continue; 5204 #if defined(PETSC_USE_DEBUG) 5205 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5206 #endif 5207 if (im[i] >= rstart && im[i] < rend) { 5208 row = im[i] - rstart; 5209 lastcol1 = -1; 5210 rp1 = aj + ai[row]; 5211 ap1 = aa + ai[row]; 5212 rmax1 = aimax[row]; 5213 nrow1 = ailen[row]; 5214 low1 = 0; 5215 high1 = nrow1; 5216 lastcol2 = -1; 5217 rp2 = bj + bi[row]; 5218 ap2 = ba + bi[row]; 5219 rmax2 = bimax[row]; 5220 nrow2 = bilen[row]; 5221 low2 = 0; 5222 high2 = nrow2; 5223 5224 for (j=0; j<n; j++) { 5225 if (roworiented) value = v[i*n+j]; 5226 else value = v[i+j*m]; 5227 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5228 if (in[j] >= cstart && in[j] < cend) { 5229 col = in[j] - cstart; 5230 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5231 } else if (in[j] < 0) continue; 5232 #if defined(PETSC_USE_DEBUG) 5233 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5234 #endif 5235 else { 5236 if (mat->was_assembled) { 5237 if (!aij->colmap) { 5238 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5239 } 5240 #if defined(PETSC_USE_CTABLE) 5241 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5242 col--; 5243 #else 5244 col = aij->colmap[in[j]] - 1; 5245 #endif 5246 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5247 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5248 col = in[j]; 5249 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5250 B = aij->B; 5251 b = (Mat_SeqAIJ*)B->data; 5252 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5253 rp2 = bj + bi[row]; 5254 ap2 = ba + bi[row]; 5255 rmax2 = bimax[row]; 5256 nrow2 = bilen[row]; 5257 low2 = 0; 5258 high2 = nrow2; 5259 bm = aij->B->rmap->n; 5260 ba = b->a; 5261 } 5262 } else col = in[j]; 5263 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5264 } 5265 } 5266 } else if (!aij->donotstash) { 5267 if (roworiented) { 5268 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5269 } else { 5270 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5271 } 5272 } 5273 } 5274 } 5275 PetscFunctionReturnVoid(); 5276 } 5277 5278