1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 109 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 110 { 111 PetscErrorCode ierr; 112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 113 114 PetscFunctionBegin; 115 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 116 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 117 } else { 118 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 119 } 120 PetscFunctionReturn(0); 121 } 122 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 127 { 128 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 129 PetscErrorCode ierr; 130 PetscInt i,rstart,nrows,*rows; 131 132 PetscFunctionBegin; 133 *zrows = NULL; 134 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 135 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 136 for (i=0; i<nrows; i++) rows[i] += rstart; 137 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 144 { 145 PetscErrorCode ierr; 146 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 147 PetscInt i,n,*garray = aij->garray; 148 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 149 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 150 PetscReal *work; 151 152 PetscFunctionBegin; 153 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 154 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 155 if (type == NORM_2) { 156 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 157 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 158 } 159 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 160 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 161 } 162 } else if (type == NORM_1) { 163 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 164 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 165 } 166 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 167 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 168 } 169 } else if (type == NORM_INFINITY) { 170 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 171 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 172 } 173 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 174 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 175 } 176 177 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 178 if (type == NORM_INFINITY) { 179 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 180 } else { 181 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 182 } 183 ierr = PetscFree(work);CHKERRQ(ierr); 184 if (type == NORM_2) { 185 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 #undef __FUNCT__ 191 #define __FUNCT__ "MatDistribute_MPIAIJ" 192 /* 193 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 194 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 195 196 Only for square matrices 197 198 Used by a preconditioner, hence PETSC_EXTERN 199 */ 200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 201 { 202 PetscMPIInt rank,size; 203 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 204 PetscErrorCode ierr; 205 Mat mat; 206 Mat_SeqAIJ *gmata; 207 PetscMPIInt tag; 208 MPI_Status status; 209 PetscBool aij; 210 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 211 212 PetscFunctionBegin; 213 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 214 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 215 if (!rank) { 216 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 217 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 218 } 219 if (reuse == MAT_INITIAL_MATRIX) { 220 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 221 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 222 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 223 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 224 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 225 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 226 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 227 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 228 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 229 230 rowners[0] = 0; 231 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 232 rstart = rowners[rank]; 233 rend = rowners[rank+1]; 234 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 235 if (!rank) { 236 gmata = (Mat_SeqAIJ*) gmat->data; 237 /* send row lengths to all processors */ 238 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 239 for (i=1; i<size; i++) { 240 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 /* determine number diagonal and off-diagonal counts */ 243 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 244 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 245 jj = 0; 246 for (i=0; i<m; i++) { 247 for (j=0; j<dlens[i]; j++) { 248 if (gmata->j[jj] < rstart) ld[i]++; 249 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 250 jj++; 251 } 252 } 253 /* send column indices to other processes */ 254 for (i=1; i<size; i++) { 255 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 256 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 257 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 258 } 259 260 /* send numerical values to other processes */ 261 for (i=1; i<size; i++) { 262 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 263 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 264 } 265 gmataa = gmata->a; 266 gmataj = gmata->j; 267 268 } else { 269 /* receive row lengths */ 270 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 271 /* receive column indices */ 272 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 273 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 274 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 275 /* determine number diagonal and off-diagonal counts */ 276 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 277 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 278 jj = 0; 279 for (i=0; i<m; i++) { 280 for (j=0; j<dlens[i]; j++) { 281 if (gmataj[jj] < rstart) ld[i]++; 282 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 283 jj++; 284 } 285 } 286 /* receive numerical values */ 287 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 288 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 289 } 290 /* set preallocation */ 291 for (i=0; i<m; i++) { 292 dlens[i] -= olens[i]; 293 } 294 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 295 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 296 297 for (i=0; i<m; i++) { 298 dlens[i] += olens[i]; 299 } 300 cnt = 0; 301 for (i=0; i<m; i++) { 302 row = rstart + i; 303 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 304 cnt += dlens[i]; 305 } 306 if (rank) { 307 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 308 } 309 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 310 ierr = PetscFree(rowners);CHKERRQ(ierr); 311 312 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 313 314 *inmat = mat; 315 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 316 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 317 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 318 mat = *inmat; 319 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 320 if (!rank) { 321 /* send numerical values to other processes */ 322 gmata = (Mat_SeqAIJ*) gmat->data; 323 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 324 gmataa = gmata->a; 325 for (i=1; i<size; i++) { 326 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 327 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 328 } 329 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 330 } else { 331 /* receive numerical values from process 0*/ 332 nz = Ad->nz + Ao->nz; 333 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 334 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 335 } 336 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 337 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 338 ad = Ad->a; 339 ao = Ao->a; 340 if (mat->rmap->n) { 341 i = 0; 342 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 343 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 344 } 345 for (i=1; i<mat->rmap->n; i++) { 346 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 347 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 348 } 349 i--; 350 if (mat->rmap->n) { 351 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 352 } 353 if (rank) { 354 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 355 } 356 } 357 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to acess. 368 */ 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 372 { 373 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 374 PetscErrorCode ierr; 375 PetscInt n = aij->B->cmap->n,i; 376 377 PetscFunctionBegin; 378 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 379 #if defined(PETSC_USE_CTABLE) 380 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 381 for (i=0; i<n; i++) { 382 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 383 } 384 #else 385 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 386 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 387 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 388 #endif 389 PetscFunctionReturn(0); 390 } 391 392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 393 { \ 394 if (col <= lastcol1) low1 = 0; \ 395 else high1 = nrow1; \ 396 lastcol1 = col;\ 397 while (high1-low1 > 5) { \ 398 t = (low1+high1)/2; \ 399 if (rp1[t] > col) high1 = t; \ 400 else low1 = t; \ 401 } \ 402 for (_i=low1; _i<high1; _i++) { \ 403 if (rp1[_i] > col) break; \ 404 if (rp1[_i] == col) { \ 405 if (addv == ADD_VALUES) ap1[_i] += value; \ 406 else ap1[_i] = value; \ 407 goto a_noinsert; \ 408 } \ 409 } \ 410 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 411 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 412 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 413 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 414 N = nrow1++ - 1; a->nz++; high1++; \ 415 /* shift up all the later entries in this row */ \ 416 for (ii=N; ii>=_i; ii--) { \ 417 rp1[ii+1] = rp1[ii]; \ 418 ap1[ii+1] = ap1[ii]; \ 419 } \ 420 rp1[_i] = col; \ 421 ap1[_i] = value; \ 422 A->nonzerostate++;\ 423 a_noinsert: ; \ 424 ailen[row] = nrow1; \ 425 } 426 427 428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 429 { \ 430 if (col <= lastcol2) low2 = 0; \ 431 else high2 = nrow2; \ 432 lastcol2 = col; \ 433 while (high2-low2 > 5) { \ 434 t = (low2+high2)/2; \ 435 if (rp2[t] > col) high2 = t; \ 436 else low2 = t; \ 437 } \ 438 for (_i=low2; _i<high2; _i++) { \ 439 if (rp2[_i] > col) break; \ 440 if (rp2[_i] == col) { \ 441 if (addv == ADD_VALUES) ap2[_i] += value; \ 442 else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 447 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 448 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 449 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 450 N = nrow2++ - 1; b->nz++; high2++; \ 451 /* shift up all the later entries in this row */ \ 452 for (ii=N; ii>=_i; ii--) { \ 453 rp2[ii+1] = rp2[ii]; \ 454 ap2[ii+1] = ap2[ii]; \ 455 } \ 456 rp2[_i] = col; \ 457 ap2[_i] = value; \ 458 B->nonzerostate++; \ 459 b_noinsert: ; \ 460 bilen[row] = nrow2; \ 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 466 { 467 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 469 PetscErrorCode ierr; 470 PetscInt l,*garray = mat->garray,diag; 471 472 PetscFunctionBegin; 473 /* code only works for square matrices A */ 474 475 /* find size of row to the left of the diagonal part */ 476 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 477 row = row - diag; 478 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 479 if (garray[b->j[b->i[row]+l]] > diag) break; 480 } 481 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 482 483 /* diagonal part */ 484 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 485 486 /* right of diagonal part */ 487 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 488 PetscFunctionReturn(0); 489 } 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatSetValues_MPIAIJ" 493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 494 { 495 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 496 PetscScalar value; 497 PetscErrorCode ierr; 498 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 499 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 500 PetscBool roworiented = aij->roworiented; 501 502 /* Some Variables required in the macro */ 503 Mat A = aij->A; 504 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 505 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 506 MatScalar *aa = a->a; 507 PetscBool ignorezeroentries = a->ignorezeroentries; 508 Mat B = aij->B; 509 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 510 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 511 MatScalar *ba = b->a; 512 513 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 514 PetscInt nonew; 515 MatScalar *ap1,*ap2; 516 517 PetscFunctionBegin; 518 for (i=0; i<m; i++) { 519 if (im[i] < 0) continue; 520 #if defined(PETSC_USE_DEBUG) 521 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 522 #endif 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 lastcol1 = -1; 526 rp1 = aj + ai[row]; 527 ap1 = aa + ai[row]; 528 rmax1 = aimax[row]; 529 nrow1 = ailen[row]; 530 low1 = 0; 531 high1 = nrow1; 532 lastcol2 = -1; 533 rp2 = bj + bi[row]; 534 ap2 = ba + bi[row]; 535 rmax2 = bimax[row]; 536 nrow2 = bilen[row]; 537 low2 = 0; 538 high2 = nrow2; 539 540 for (j=0; j<n; j++) { 541 if (roworiented) value = v[i*n+j]; 542 else value = v[i+j*m]; 543 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 544 if (in[j] >= cstart && in[j] < cend) { 545 col = in[j] - cstart; 546 nonew = a->nonew; 547 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 548 } else if (in[j] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 551 #endif 552 else { 553 if (mat->was_assembled) { 554 if (!aij->colmap) { 555 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 556 } 557 #if defined(PETSC_USE_CTABLE) 558 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 559 col--; 560 #else 561 col = aij->colmap[in[j]] - 1; 562 #endif 563 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 564 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 565 col = in[j]; 566 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 567 B = aij->B; 568 b = (Mat_SeqAIJ*)B->data; 569 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 570 rp2 = bj + bi[row]; 571 ap2 = ba + bi[row]; 572 rmax2 = bimax[row]; 573 nrow2 = bilen[row]; 574 low2 = 0; 575 high2 = nrow2; 576 bm = aij->B->rmap->n; 577 ba = b->a; 578 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 579 } else col = in[j]; 580 nonew = b->nonew; 581 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 582 } 583 } 584 } else { 585 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 586 if (!aij->donotstash) { 587 mat->assembled = PETSC_FALSE; 588 if (roworiented) { 589 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 590 } else { 591 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 592 } 593 } 594 } 595 } 596 PetscFunctionReturn(0); 597 } 598 599 #undef __FUNCT__ 600 #define __FUNCT__ "MatGetValues_MPIAIJ" 601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 602 { 603 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 604 PetscErrorCode ierr; 605 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 606 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 607 608 PetscFunctionBegin; 609 for (i=0; i<m; i++) { 610 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 611 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 612 if (idxm[i] >= rstart && idxm[i] < rend) { 613 row = idxm[i] - rstart; 614 for (j=0; j<n; j++) { 615 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 616 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 617 if (idxn[j] >= cstart && idxn[j] < cend) { 618 col = idxn[j] - cstart; 619 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 620 } else { 621 if (!aij->colmap) { 622 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 623 } 624 #if defined(PETSC_USE_CTABLE) 625 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 626 col--; 627 #else 628 col = aij->colmap[idxn[j]] - 1; 629 #endif 630 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 631 else { 632 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 637 } 638 PetscFunctionReturn(0); 639 } 640 641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 642 643 #undef __FUNCT__ 644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 648 PetscErrorCode ierr; 649 PetscInt nstash,reallocs; 650 InsertMode addv; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 /* make sure all processors are either in INSERTMODE or ADDMODE */ 656 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 657 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 658 mat->insertmode = addv; /* in case this processor had no cache */ 659 660 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 661 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 662 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 663 PetscFunctionReturn(0); 664 } 665 666 #undef __FUNCT__ 667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 669 { 670 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 672 PetscErrorCode ierr; 673 PetscMPIInt n; 674 PetscInt i,j,rstart,ncols,flg; 675 PetscInt *row,*col; 676 PetscBool other_disassembled; 677 PetscScalar *val; 678 InsertMode addv = mat->insertmode; 679 680 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 681 682 PetscFunctionBegin; 683 if (!aij->donotstash && !mat->nooffprocentries) { 684 while (1) { 685 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 686 if (!flg) break; 687 688 for (i=0; i<n; ) { 689 /* Now identify the consecutive vals belonging to the same row */ 690 for (j=i,rstart=row[j]; j<n; j++) { 691 if (row[j] != rstart) break; 692 } 693 if (j < n) ncols = j-i; 694 else ncols = n-i; 695 /* Now assemble all these values with a single function call */ 696 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 697 698 i = j; 699 } 700 } 701 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 702 } 703 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 704 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 705 706 /* determine if any processor has disassembled, if so we must 707 also disassemble ourselfs, in order that we may reassemble. */ 708 /* 709 if nonzero structure of submatrix B cannot change then we know that 710 no processor disassembled thus we can skip this stuff 711 */ 712 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 713 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 714 if (mat->was_assembled && !other_disassembled) { 715 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 716 } 717 } 718 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 719 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 720 } 721 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 722 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 723 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 724 725 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 726 727 aij->rowvalues = 0; 728 729 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 730 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 731 732 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 733 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 734 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 735 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 743 { 744 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 745 PetscErrorCode ierr; 746 747 PetscFunctionBegin; 748 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 749 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_MPIAIJ" 755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 756 { 757 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 758 PetscInt *owners = A->rmap->range; 759 PetscInt n = A->rmap->n; 760 PetscSF sf; 761 PetscInt *lrows; 762 PetscSFNode *rrows; 763 PetscInt r, p = 0, len = 0; 764 PetscErrorCode ierr; 765 766 PetscFunctionBegin; 767 /* Create SF where leaves are input rows and roots are owned rows */ 768 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 769 for (r = 0; r < n; ++r) lrows[r] = -1; 770 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 771 for (r = 0; r < N; ++r) { 772 const PetscInt idx = rows[r]; 773 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 774 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 775 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 776 } 777 if (A->nooffproczerorows) { 778 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 779 lrows[len++] = idx - owners[p]; 780 } else { 781 rrows[r].rank = p; 782 rrows[r].index = rows[r] - owners[p]; 783 } 784 } 785 if (!A->nooffproczerorows) { 786 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 787 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 788 /* Collect flags for rows to be zeroed */ 789 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 790 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 791 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 792 /* Compress and put in row numbers */ 793 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 794 } 795 /* fix right hand side if needed */ 796 if (x && b) { 797 const PetscScalar *xx; 798 PetscScalar *bb; 799 800 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 801 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 802 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 803 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 804 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 805 } 806 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 807 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 809 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 810 } else if (diag != 0.0) { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 813 for (r = 0; r < len; ++r) { 814 const PetscInt row = lrows[r] + A->rmap->rstart; 815 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 816 } 817 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 818 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 819 } else { 820 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 821 } 822 ierr = PetscFree(lrows);CHKERRQ(ierr); 823 824 /* only change matrix nonzero state if pattern was allowed to be changed */ 825 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 826 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 827 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 828 } 829 PetscFunctionReturn(0); 830 } 831 832 #undef __FUNCT__ 833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 835 { 836 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 837 PetscErrorCode ierr; 838 PetscMPIInt n = A->rmap->n; 839 PetscInt i,j,r,m,p = 0,len = 0; 840 PetscInt *lrows,*owners = A->rmap->range; 841 PetscSFNode *rrows; 842 PetscSF sf; 843 const PetscScalar *xx; 844 PetscScalar *bb,*mask; 845 Vec xmask,lmask; 846 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 847 const PetscInt *aj, *ii,*ridx; 848 PetscScalar *aa; 849 850 PetscFunctionBegin; 851 /* Create SF where leaves are input rows and roots are owned rows */ 852 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 853 for (r = 0; r < n; ++r) lrows[r] = -1; 854 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 855 for (r = 0; r < N; ++r) { 856 const PetscInt idx = rows[r]; 857 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 858 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 859 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 860 } 861 rrows[r].rank = p; 862 rrows[r].index = rows[r] - owners[p]; 863 } 864 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 865 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 866 /* Collect flags for rows to be zeroed */ 867 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 868 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 870 /* Compress and put in row numbers */ 871 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 872 /* zero diagonal part of matrix */ 873 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 874 /* handle off diagonal part of matrix */ 875 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 876 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 877 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 878 for (i=0; i<len; i++) bb[lrows[i]] = 1; 879 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 880 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 883 if (x) { 884 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 887 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 888 } 889 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 890 /* remove zeroed rows of off diagonal matrix */ 891 ii = aij->i; 892 for (i=0; i<len; i++) { 893 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 894 } 895 /* loop over all elements of off process part of matrix zeroing removed columns*/ 896 if (aij->compressedrow.use) { 897 m = aij->compressedrow.nrows; 898 ii = aij->compressedrow.i; 899 ridx = aij->compressedrow.rindex; 900 for (i=0; i<m; i++) { 901 n = ii[i+1] - ii[i]; 902 aj = aij->j + ii[i]; 903 aa = aij->a + ii[i]; 904 905 for (j=0; j<n; j++) { 906 if (PetscAbsScalar(mask[*aj])) { 907 if (b) bb[*ridx] -= *aa*xx[*aj]; 908 *aa = 0.0; 909 } 910 aa++; 911 aj++; 912 } 913 ridx++; 914 } 915 } else { /* do not use compressed row format */ 916 m = l->B->rmap->n; 917 for (i=0; i<m; i++) { 918 n = ii[i+1] - ii[i]; 919 aj = aij->j + ii[i]; 920 aa = aij->a + ii[i]; 921 for (j=0; j<n; j++) { 922 if (PetscAbsScalar(mask[*aj])) { 923 if (b) bb[i] -= *aa*xx[*aj]; 924 *aa = 0.0; 925 } 926 aa++; 927 aj++; 928 } 929 } 930 } 931 if (x) { 932 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 933 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 934 } 935 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 936 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 937 ierr = PetscFree(lrows);CHKERRQ(ierr); 938 939 /* only change matrix nonzero state if pattern was allowed to be changed */ 940 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 941 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 942 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatMult_MPIAIJ" 949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 PetscInt nt; 954 955 PetscFunctionBegin; 956 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 957 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 958 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 959 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 960 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 961 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 968 { 969 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 970 PetscErrorCode ierr; 971 972 PetscFunctionBegin; 973 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 974 PetscFunctionReturn(0); 975 } 976 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatMultAdd_MPIAIJ" 979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 982 PetscErrorCode ierr; 983 984 PetscFunctionBegin; 985 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 987 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 989 PetscFunctionReturn(0); 990 } 991 992 #undef __FUNCT__ 993 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 995 { 996 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 997 PetscErrorCode ierr; 998 PetscBool merged; 999 1000 PetscFunctionBegin; 1001 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1002 /* do nondiagonal part */ 1003 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1004 if (!merged) { 1005 /* send it on its way */ 1006 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1007 /* do local part */ 1008 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1009 /* receive remote parts: note this assumes the values are not actually */ 1010 /* added in yy until the next line, */ 1011 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1012 } else { 1013 /* do local part */ 1014 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1015 /* send it on its way */ 1016 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1017 /* values actually were received in the Begin() but we need to call this nop */ 1018 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1019 } 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1025 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1026 { 1027 MPI_Comm comm; 1028 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1029 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1030 IS Me,Notme; 1031 PetscErrorCode ierr; 1032 PetscInt M,N,first,last,*notme,i; 1033 PetscMPIInt size; 1034 1035 PetscFunctionBegin; 1036 /* Easy test: symmetric diagonal block */ 1037 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1038 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1039 if (!*f) PetscFunctionReturn(0); 1040 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1041 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1042 if (size == 1) PetscFunctionReturn(0); 1043 1044 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1045 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1046 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1047 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1048 for (i=0; i<first; i++) notme[i] = i; 1049 for (i=last; i<M; i++) notme[i-last+first] = i; 1050 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1051 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1052 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1053 Aoff = Aoffs[0]; 1054 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1055 Boff = Boffs[0]; 1056 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1057 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1059 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1061 ierr = PetscFree(notme);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1075 /* send it on its way */ 1076 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1077 /* do local part */ 1078 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1079 /* receive remote parts */ 1080 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 PetscFunctionReturn(0); 1082 } 1083 1084 /* 1085 This only works correctly for square matrices where the subblock A->A is the 1086 diagonal block 1087 */ 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1091 { 1092 PetscErrorCode ierr; 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 1095 PetscFunctionBegin; 1096 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1097 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1098 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatScale_MPIAIJ" 1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1105 { 1106 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1111 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatDestroy_MPIAIJ" 1117 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1118 { 1119 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1120 PetscErrorCode ierr; 1121 1122 PetscFunctionBegin; 1123 #if defined(PETSC_USE_LOG) 1124 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1125 #endif 1126 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1127 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1128 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1130 #if defined(PETSC_USE_CTABLE) 1131 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1132 #else 1133 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1134 #endif 1135 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1136 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1137 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1138 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1139 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1140 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1141 1142 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1143 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1151 #if defined(PETSC_HAVE_ELEMENTAL) 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1153 #endif 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1159 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1160 { 1161 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1162 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1163 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1164 PetscErrorCode ierr; 1165 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1166 int fd; 1167 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1168 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1169 PetscScalar *column_values; 1170 PetscInt message_count,flowcontrolcount; 1171 FILE *file; 1172 1173 PetscFunctionBegin; 1174 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1175 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1176 nz = A->nz + B->nz; 1177 if (!rank) { 1178 header[0] = MAT_FILE_CLASSID; 1179 header[1] = mat->rmap->N; 1180 header[2] = mat->cmap->N; 1181 1182 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 /* get largest number of rows any processor has */ 1186 rlen = mat->rmap->n; 1187 range = mat->rmap->range; 1188 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1189 } else { 1190 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1191 rlen = mat->rmap->n; 1192 } 1193 1194 /* load up the local row counts */ 1195 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1196 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1197 1198 /* store the row lengths to the file */ 1199 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1200 if (!rank) { 1201 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1202 for (i=1; i<size; i++) { 1203 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1204 rlen = range[i+1] - range[i]; 1205 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1206 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1207 } 1208 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1209 } else { 1210 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1211 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1213 } 1214 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1215 1216 /* load up the local column indices */ 1217 nzmax = nz; /* th processor needs space a largest processor needs */ 1218 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1220 cnt = 0; 1221 for (i=0; i<mat->rmap->n; i++) { 1222 for (j=B->i[i]; j<B->i[i+1]; j++) { 1223 if ((col = garray[B->j[j]]) > cstart) break; 1224 column_indices[cnt++] = col; 1225 } 1226 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1227 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1228 } 1229 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1230 1231 /* store the column indices to the file */ 1232 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1233 if (!rank) { 1234 MPI_Status status; 1235 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1236 for (i=1; i<size; i++) { 1237 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1238 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1239 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1240 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1241 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 } 1243 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1244 } else { 1245 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1246 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1248 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1249 } 1250 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1251 1252 /* load up the local column values */ 1253 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1254 cnt = 0; 1255 for (i=0; i<mat->rmap->n; i++) { 1256 for (j=B->i[i]; j<B->i[i+1]; j++) { 1257 if (garray[B->j[j]] > cstart) break; 1258 column_values[cnt++] = B->a[j]; 1259 } 1260 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1261 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1262 } 1263 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1264 1265 /* store the column values to the file */ 1266 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1267 if (!rank) { 1268 MPI_Status status; 1269 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1270 for (i=1; i<size; i++) { 1271 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1272 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1273 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1274 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1275 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1276 } 1277 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1278 } else { 1279 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1280 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1282 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1283 } 1284 ierr = PetscFree(column_values);CHKERRQ(ierr); 1285 1286 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1287 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 #include <petscdraw.h> 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1294 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1295 { 1296 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1297 PetscErrorCode ierr; 1298 PetscMPIInt rank = aij->rank,size = aij->size; 1299 PetscBool isdraw,iascii,isbinary; 1300 PetscViewer sviewer; 1301 PetscViewerFormat format; 1302 1303 PetscFunctionBegin; 1304 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1305 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1306 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1307 if (iascii) { 1308 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscBool inodes; 1312 1313 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1314 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1315 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1317 if (!inodes) { 1318 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1319 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1320 } else { 1321 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1322 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1323 } 1324 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1328 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1329 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1330 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1331 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1332 PetscFunctionReturn(0); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount,inodelimit,*inodes; 1335 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1336 if (inodes) { 1337 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1338 } else { 1339 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(0); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1348 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1349 } else { 1350 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1351 } 1352 PetscFunctionReturn(0); 1353 } else if (isdraw) { 1354 PetscDraw draw; 1355 PetscBool isnull; 1356 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1357 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1358 } 1359 1360 { 1361 /* assemble the entire matrix onto first processor. */ 1362 Mat A; 1363 Mat_SeqAIJ *Aloc; 1364 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1365 MatScalar *a; 1366 1367 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1368 if (!rank) { 1369 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1370 } else { 1371 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1372 } 1373 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1374 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1375 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1376 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1377 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1378 1379 /* copy over the A part */ 1380 Aloc = (Mat_SeqAIJ*)aij->A->data; 1381 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1382 row = mat->rmap->rstart; 1383 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1384 for (i=0; i<m; i++) { 1385 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1386 row++; 1387 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1388 } 1389 aj = Aloc->j; 1390 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1391 1392 /* copy over the B part */ 1393 Aloc = (Mat_SeqAIJ*)aij->B->data; 1394 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1395 row = mat->rmap->rstart; 1396 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1397 ct = cols; 1398 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1399 for (i=0; i<m; i++) { 1400 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1401 row++; 1402 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1403 } 1404 ierr = PetscFree(ct);CHKERRQ(ierr); 1405 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1407 /* 1408 Everyone has to call to draw the matrix since the graphics waits are 1409 synchronized across all processors that share the PetscDraw object 1410 */ 1411 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1412 if (!rank) { 1413 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1414 } 1415 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1416 ierr = MatDestroy(&A);CHKERRQ(ierr); 1417 } 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #undef __FUNCT__ 1422 #define __FUNCT__ "MatView_MPIAIJ" 1423 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1424 { 1425 PetscErrorCode ierr; 1426 PetscBool iascii,isdraw,issocket,isbinary; 1427 1428 PetscFunctionBegin; 1429 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1433 if (iascii || isdraw || isbinary || issocket) { 1434 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1435 } 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatSOR_MPIAIJ" 1441 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1442 { 1443 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1444 PetscErrorCode ierr; 1445 Vec bb1 = 0; 1446 PetscBool hasop; 1447 1448 PetscFunctionBegin; 1449 if (flag == SOR_APPLY_UPPER) { 1450 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1451 PetscFunctionReturn(0); 1452 } 1453 1454 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1455 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1456 } 1457 1458 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1461 its--; 1462 } 1463 1464 while (its--) { 1465 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1466 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1467 1468 /* update rhs: bb1 = bb - B*x */ 1469 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1470 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1471 1472 /* local sweep */ 1473 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1474 } 1475 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1476 if (flag & SOR_ZERO_INITIAL_GUESS) { 1477 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1478 its--; 1479 } 1480 while (its--) { 1481 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1482 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1483 1484 /* update rhs: bb1 = bb - B*x */ 1485 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1486 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1487 1488 /* local sweep */ 1489 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1490 } 1491 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1492 if (flag & SOR_ZERO_INITIAL_GUESS) { 1493 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1494 its--; 1495 } 1496 while (its--) { 1497 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1498 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1499 1500 /* update rhs: bb1 = bb - B*x */ 1501 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1502 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1506 } 1507 } else if (flag & SOR_EISENSTAT) { 1508 Vec xx1; 1509 1510 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1511 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1512 1513 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 if (!mat->diag) { 1516 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1517 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1518 } 1519 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1520 if (hasop) { 1521 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1522 } else { 1523 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1524 } 1525 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1526 1527 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1528 1529 /* local sweep */ 1530 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1531 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1532 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1533 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1534 1535 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatPermute_MPIAIJ" 1541 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1542 { 1543 Mat aA,aB,Aperm; 1544 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1545 PetscScalar *aa,*ba; 1546 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1547 PetscSF rowsf,sf; 1548 IS parcolp = NULL; 1549 PetscBool done; 1550 PetscErrorCode ierr; 1551 1552 PetscFunctionBegin; 1553 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1554 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1555 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1556 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1557 1558 /* Invert row permutation to find out where my rows should go */ 1559 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1560 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1561 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1562 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1563 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1564 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1565 1566 /* Invert column permutation to find out where my columns should go */ 1567 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1568 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1569 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1570 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1571 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1572 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1573 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1574 1575 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1576 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1577 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1578 1579 /* Find out where my gcols should go */ 1580 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1581 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1588 1589 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1590 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1591 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt row = rdest[i],rowner; 1594 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1595 for (j=ai[i]; j<ai[i+1]; j++) { 1596 PetscInt cowner,col = cdest[aj[j]]; 1597 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1598 if (rowner == cowner) dnnz[i]++; 1599 else onnz[i]++; 1600 } 1601 for (j=bi[i]; j<bi[i+1]; j++) { 1602 PetscInt cowner,col = gcdest[bj[j]]; 1603 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1604 if (rowner == cowner) dnnz[i]++; 1605 else onnz[i]++; 1606 } 1607 } 1608 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1612 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1613 1614 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1615 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1616 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1617 for (i=0; i<m; i++) { 1618 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1619 PetscInt j0,rowlen; 1620 rowlen = ai[i+1] - ai[i]; 1621 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1622 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1623 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1624 } 1625 rowlen = bi[i+1] - bi[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 } 1631 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1632 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1633 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1634 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1635 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1637 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1638 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1639 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1640 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1641 *B = Aperm; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1647 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1648 { 1649 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1650 Mat A = mat->A,B = mat->B; 1651 PetscErrorCode ierr; 1652 PetscReal isend[5],irecv[5]; 1653 1654 PetscFunctionBegin; 1655 info->block_size = 1.0; 1656 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1657 1658 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1659 isend[3] = info->memory; isend[4] = info->mallocs; 1660 1661 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1664 isend[3] += info->memory; isend[4] += info->mallocs; 1665 if (flag == MAT_LOCAL) { 1666 info->nz_used = isend[0]; 1667 info->nz_allocated = isend[1]; 1668 info->nz_unneeded = isend[2]; 1669 info->memory = isend[3]; 1670 info->mallocs = isend[4]; 1671 } else if (flag == MAT_GLOBAL_MAX) { 1672 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } else if (flag == MAT_GLOBAL_SUM) { 1680 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1681 1682 info->nz_used = irecv[0]; 1683 info->nz_allocated = irecv[1]; 1684 info->nz_unneeded = irecv[2]; 1685 info->memory = irecv[3]; 1686 info->mallocs = irecv[4]; 1687 } 1688 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1689 info->fill_ratio_needed = 0; 1690 info->factor_mallocs = 0; 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatSetOption_MPIAIJ" 1696 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1697 { 1698 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 switch (op) { 1703 case MAT_NEW_NONZERO_LOCATIONS: 1704 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1705 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1706 case MAT_KEEP_NONZERO_PATTERN: 1707 case MAT_NEW_NONZERO_LOCATION_ERR: 1708 case MAT_USE_INODES: 1709 case MAT_IGNORE_ZERO_ENTRIES: 1710 MatCheckPreallocated(A,1); 1711 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1712 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1713 break; 1714 case MAT_ROW_ORIENTED: 1715 a->roworiented = flg; 1716 1717 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1718 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1719 break; 1720 case MAT_NEW_DIAGONALS: 1721 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1722 break; 1723 case MAT_IGNORE_OFF_PROC_ENTRIES: 1724 a->donotstash = flg; 1725 break; 1726 case MAT_SPD: 1727 A->spd_set = PETSC_TRUE; 1728 A->spd = flg; 1729 if (flg) { 1730 A->symmetric = PETSC_TRUE; 1731 A->structurally_symmetric = PETSC_TRUE; 1732 A->symmetric_set = PETSC_TRUE; 1733 A->structurally_symmetric_set = PETSC_TRUE; 1734 } 1735 break; 1736 case MAT_SYMMETRIC: 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 break; 1739 case MAT_STRUCTURALLY_SYMMETRIC: 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 break; 1742 case MAT_HERMITIAN: 1743 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_SYMMETRY_ETERNAL: 1746 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1747 break; 1748 default: 1749 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatGetRow_MPIAIJ" 1756 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1757 { 1758 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1759 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1760 PetscErrorCode ierr; 1761 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1762 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1763 PetscInt *cmap,*idx_p; 1764 1765 PetscFunctionBegin; 1766 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1767 mat->getrowactive = PETSC_TRUE; 1768 1769 if (!mat->rowvalues && (idx || v)) { 1770 /* 1771 allocate enough space to hold information from the longest row. 1772 */ 1773 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1774 PetscInt max = 1,tmp; 1775 for (i=0; i<matin->rmap->n; i++) { 1776 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1777 if (max < tmp) max = tmp; 1778 } 1779 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1780 } 1781 1782 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1783 lrow = row - rstart; 1784 1785 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1786 if (!v) {pvA = 0; pvB = 0;} 1787 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1788 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1789 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1790 nztot = nzA + nzB; 1791 1792 cmap = mat->garray; 1793 if (v || idx) { 1794 if (nztot) { 1795 /* Sort by increasing column numbers, assuming A and B already sorted */ 1796 PetscInt imark = -1; 1797 if (v) { 1798 *v = v_p = mat->rowvalues; 1799 for (i=0; i<nzB; i++) { 1800 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1801 else break; 1802 } 1803 imark = i; 1804 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1805 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1806 } 1807 if (idx) { 1808 *idx = idx_p = mat->rowindices; 1809 if (imark > -1) { 1810 for (i=0; i<imark; i++) { 1811 idx_p[i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 for (i=0; i<nzB; i++) { 1815 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1816 else break; 1817 } 1818 imark = i; 1819 } 1820 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1821 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1822 } 1823 } else { 1824 if (idx) *idx = 0; 1825 if (v) *v = 0; 1826 } 1827 } 1828 *nz = nztot; 1829 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1830 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1831 PetscFunctionReturn(0); 1832 } 1833 1834 #undef __FUNCT__ 1835 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1836 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1837 { 1838 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1839 1840 PetscFunctionBegin; 1841 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1842 aij->getrowactive = PETSC_FALSE; 1843 PetscFunctionReturn(0); 1844 } 1845 1846 #undef __FUNCT__ 1847 #define __FUNCT__ "MatNorm_MPIAIJ" 1848 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1849 { 1850 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1851 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1852 PetscErrorCode ierr; 1853 PetscInt i,j,cstart = mat->cmap->rstart; 1854 PetscReal sum = 0.0; 1855 MatScalar *v; 1856 1857 PetscFunctionBegin; 1858 if (aij->size == 1) { 1859 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1860 } else { 1861 if (type == NORM_FROBENIUS) { 1862 v = amat->a; 1863 for (i=0; i<amat->nz; i++) { 1864 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1865 } 1866 v = bmat->a; 1867 for (i=0; i<bmat->nz; i++) { 1868 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1869 } 1870 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1871 *norm = PetscSqrtReal(*norm); 1872 } else if (type == NORM_1) { /* max column norm */ 1873 PetscReal *tmp,*tmp2; 1874 PetscInt *jj,*garray = aij->garray; 1875 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1876 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1877 *norm = 0.0; 1878 v = amat->a; jj = amat->j; 1879 for (j=0; j<amat->nz; j++) { 1880 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1881 } 1882 v = bmat->a; jj = bmat->j; 1883 for (j=0; j<bmat->nz; j++) { 1884 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1885 } 1886 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1887 for (j=0; j<mat->cmap->N; j++) { 1888 if (tmp2[j] > *norm) *norm = tmp2[j]; 1889 } 1890 ierr = PetscFree(tmp);CHKERRQ(ierr); 1891 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1892 } else if (type == NORM_INFINITY) { /* max row norm */ 1893 PetscReal ntemp = 0.0; 1894 for (j=0; j<aij->A->rmap->n; j++) { 1895 v = amat->a + amat->i[j]; 1896 sum = 0.0; 1897 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1898 sum += PetscAbsScalar(*v); v++; 1899 } 1900 v = bmat->a + bmat->i[j]; 1901 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1902 sum += PetscAbsScalar(*v); v++; 1903 } 1904 if (sum > ntemp) ntemp = sum; 1905 } 1906 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1908 } 1909 PetscFunctionReturn(0); 1910 } 1911 1912 #undef __FUNCT__ 1913 #define __FUNCT__ "MatTranspose_MPIAIJ" 1914 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1915 { 1916 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1917 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1918 PetscErrorCode ierr; 1919 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1920 PetscInt cstart = A->cmap->rstart,ncol; 1921 Mat B; 1922 MatScalar *array; 1923 1924 PetscFunctionBegin; 1925 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1926 1927 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1928 ai = Aloc->i; aj = Aloc->j; 1929 bi = Bloc->i; bj = Bloc->j; 1930 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1931 PetscInt *d_nnz,*g_nnz,*o_nnz; 1932 PetscSFNode *oloc; 1933 PETSC_UNUSED PetscSF sf; 1934 1935 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1936 /* compute d_nnz for preallocation */ 1937 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1938 for (i=0; i<ai[ma]; i++) { 1939 d_nnz[aj[i]]++; 1940 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1941 } 1942 /* compute local off-diagonal contributions */ 1943 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1944 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1945 /* map those to global */ 1946 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1947 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1948 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1949 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1950 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1951 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1952 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1953 1954 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1955 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1956 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1957 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1958 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1959 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1960 } else { 1961 B = *matout; 1962 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1963 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1964 } 1965 1966 /* copy over the A part */ 1967 array = Aloc->a; 1968 row = A->rmap->rstart; 1969 for (i=0; i<ma; i++) { 1970 ncol = ai[i+1]-ai[i]; 1971 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1972 row++; 1973 array += ncol; aj += ncol; 1974 } 1975 aj = Aloc->j; 1976 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1977 1978 /* copy over the B part */ 1979 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1980 array = Bloc->a; 1981 row = A->rmap->rstart; 1982 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i=0; i<mb; i++) { 1985 ncol = bi[i+1]-bi[i]; 1986 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1987 row++; 1988 array += ncol; cols_tmp += ncol; 1989 } 1990 ierr = PetscFree(cols);CHKERRQ(ierr); 1991 1992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1994 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1995 *matout = B; 1996 } else { 1997 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2007 Mat a = aij->A,b = aij->B; 2008 PetscErrorCode ierr; 2009 PetscInt s1,s2,s3; 2010 2011 PetscFunctionBegin; 2012 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2013 if (rr) { 2014 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2015 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2016 /* Overlap communication with computation. */ 2017 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2018 } 2019 if (ll) { 2020 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2021 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2022 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2023 } 2024 /* scale the diagonal block */ 2025 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2026 2027 if (rr) { 2028 /* Do a scatter end and then right scale the off-diagonal block */ 2029 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2030 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 #undef __FUNCT__ 2036 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2040 PetscErrorCode ierr; 2041 2042 PetscFunctionBegin; 2043 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 #undef __FUNCT__ 2048 #define __FUNCT__ "MatEqual_MPIAIJ" 2049 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2050 { 2051 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2052 Mat a,b,c,d; 2053 PetscBool flg; 2054 PetscErrorCode ierr; 2055 2056 PetscFunctionBegin; 2057 a = matA->A; b = matA->B; 2058 c = matB->A; d = matB->B; 2059 2060 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2061 if (flg) { 2062 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2063 } 2064 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 #undef __FUNCT__ 2069 #define __FUNCT__ "MatCopy_MPIAIJ" 2070 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2071 { 2072 PetscErrorCode ierr; 2073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2074 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2075 2076 PetscFunctionBegin; 2077 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2078 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2079 /* because of the column compression in the off-processor part of the matrix a->B, 2080 the number of columns in a->B and b->B may be different, hence we cannot call 2081 the MatCopy() directly on the two parts. If need be, we can provide a more 2082 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2083 then copying the submatrices */ 2084 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2085 } else { 2086 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2087 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatSetUp_MPIAIJ" 2094 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2095 { 2096 PetscErrorCode ierr; 2097 2098 PetscFunctionBegin; 2099 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 /* 2104 Computes the number of nonzeros per row needed for preallocation when X and Y 2105 have different nonzero structure. 2106 */ 2107 #undef __FUNCT__ 2108 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2109 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2110 { 2111 PetscInt i,j,k,nzx,nzy; 2112 2113 PetscFunctionBegin; 2114 /* Set the number of nonzeros in the new matrix */ 2115 for (i=0; i<m; i++) { 2116 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2117 nzx = xi[i+1] - xi[i]; 2118 nzy = yi[i+1] - yi[i]; 2119 nnz[i] = 0; 2120 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2121 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2122 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2123 nnz[i]++; 2124 } 2125 for (; k<nzy; k++) nnz[i]++; 2126 } 2127 PetscFunctionReturn(0); 2128 } 2129 2130 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2133 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt m = Y->rmap->N; 2137 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2138 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2139 2140 PetscFunctionBegin; 2141 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 #undef __FUNCT__ 2146 #define __FUNCT__ "MatAXPY_MPIAIJ" 2147 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2151 PetscBLASInt bnz,one=1; 2152 Mat_SeqAIJ *x,*y; 2153 2154 PetscFunctionBegin; 2155 if (str == SAME_NONZERO_PATTERN) { 2156 PetscScalar alpha = a; 2157 x = (Mat_SeqAIJ*)xx->A->data; 2158 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2159 y = (Mat_SeqAIJ*)yy->A->data; 2160 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2161 x = (Mat_SeqAIJ*)xx->B->data; 2162 y = (Mat_SeqAIJ*)yy->B->data; 2163 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2164 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2165 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2166 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2167 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2168 } else { 2169 Mat B; 2170 PetscInt *nnz_d,*nnz_o; 2171 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2172 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2173 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2174 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2175 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2176 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2177 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2178 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2179 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2180 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2181 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2182 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2183 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2184 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2185 } 2186 PetscFunctionReturn(0); 2187 } 2188 2189 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2190 2191 #undef __FUNCT__ 2192 #define __FUNCT__ "MatConjugate_MPIAIJ" 2193 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2194 { 2195 #if defined(PETSC_USE_COMPLEX) 2196 PetscErrorCode ierr; 2197 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2198 2199 PetscFunctionBegin; 2200 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2201 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2202 #else 2203 PetscFunctionBegin; 2204 #endif 2205 PetscFunctionReturn(0); 2206 } 2207 2208 #undef __FUNCT__ 2209 #define __FUNCT__ "MatRealPart_MPIAIJ" 2210 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2211 { 2212 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2213 PetscErrorCode ierr; 2214 2215 PetscFunctionBegin; 2216 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2217 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2218 PetscFunctionReturn(0); 2219 } 2220 2221 #undef __FUNCT__ 2222 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2223 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2224 { 2225 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2226 PetscErrorCode ierr; 2227 2228 PetscFunctionBegin; 2229 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2230 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #if defined(PETSC_HAVE_PBGL) 2235 2236 #include <boost/parallel/mpi/bsp_process_group.hpp> 2237 #include <boost/graph/distributed/ilu_default_graph.hpp> 2238 #include <boost/graph/distributed/ilu_0_block.hpp> 2239 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2240 #include <boost/graph/distributed/petsc/interface.hpp> 2241 #include <boost/multi_array.hpp> 2242 #include <boost/parallel/distributed_property_map->hpp> 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2246 /* 2247 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2248 */ 2249 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2250 { 2251 namespace petsc = boost::distributed::petsc; 2252 2253 namespace graph_dist = boost::graph::distributed; 2254 using boost::graph::distributed::ilu_default::process_group_type; 2255 using boost::graph::ilu_permuted; 2256 2257 PetscBool row_identity, col_identity; 2258 PetscContainer c; 2259 PetscInt m, n, M, N; 2260 PetscErrorCode ierr; 2261 2262 PetscFunctionBegin; 2263 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2264 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2265 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2266 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2267 2268 process_group_type pg; 2269 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2270 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2271 lgraph_type& level_graph = *lgraph_p; 2272 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2273 2274 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2275 ilu_permuted(level_graph); 2276 2277 /* put together the new matrix */ 2278 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2279 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2280 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2281 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2282 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2283 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2284 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2285 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 2287 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2288 ierr = PetscContainerSetPointer(c, lgraph_p); 2289 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2290 ierr = PetscContainerDestroy(&c); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2296 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2297 { 2298 PetscFunctionBegin; 2299 PetscFunctionReturn(0); 2300 } 2301 2302 #undef __FUNCT__ 2303 #define __FUNCT__ "MatSolve_MPIAIJ" 2304 /* 2305 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2306 */ 2307 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2308 { 2309 namespace graph_dist = boost::graph::distributed; 2310 2311 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2312 lgraph_type *lgraph_p; 2313 PetscContainer c; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2318 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2319 ierr = VecCopy(b, x);CHKERRQ(ierr); 2320 2321 PetscScalar *array_x; 2322 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2323 PetscInt sx; 2324 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2325 2326 PetscScalar *array_b; 2327 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2328 PetscInt sb; 2329 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2330 2331 lgraph_type& level_graph = *lgraph_p; 2332 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2333 2334 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2335 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2336 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2337 2338 typedef boost::iterator_property_map<array_ref_type::iterator, 2339 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2340 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2341 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2342 2343 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2344 PetscFunctionReturn(0); 2345 } 2346 #endif 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2350 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2351 { 2352 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2353 PetscErrorCode ierr; 2354 PetscInt i,*idxb = 0; 2355 PetscScalar *va,*vb; 2356 Vec vtmp; 2357 2358 PetscFunctionBegin; 2359 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2360 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2361 if (idx) { 2362 for (i=0; i<A->rmap->n; i++) { 2363 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2364 } 2365 } 2366 2367 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2368 if (idx) { 2369 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2370 } 2371 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2372 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2373 2374 for (i=0; i<A->rmap->n; i++) { 2375 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2376 va[i] = vb[i]; 2377 if (idx) idx[i] = a->garray[idxb[i]]; 2378 } 2379 } 2380 2381 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2382 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2383 ierr = PetscFree(idxb);CHKERRQ(ierr); 2384 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #undef __FUNCT__ 2389 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2390 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2391 { 2392 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2393 PetscErrorCode ierr; 2394 PetscInt i,*idxb = 0; 2395 PetscScalar *va,*vb; 2396 Vec vtmp; 2397 2398 PetscFunctionBegin; 2399 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2401 if (idx) { 2402 for (i=0; i<A->cmap->n; i++) { 2403 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2404 } 2405 } 2406 2407 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2408 if (idx) { 2409 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2410 } 2411 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2412 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2413 2414 for (i=0; i<A->rmap->n; i++) { 2415 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2416 va[i] = vb[i]; 2417 if (idx) idx[i] = a->garray[idxb[i]]; 2418 } 2419 } 2420 2421 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2422 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2423 ierr = PetscFree(idxb);CHKERRQ(ierr); 2424 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2425 PetscFunctionReturn(0); 2426 } 2427 2428 #undef __FUNCT__ 2429 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2430 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2431 { 2432 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2433 PetscInt n = A->rmap->n; 2434 PetscInt cstart = A->cmap->rstart; 2435 PetscInt *cmap = mat->garray; 2436 PetscInt *diagIdx, *offdiagIdx; 2437 Vec diagV, offdiagV; 2438 PetscScalar *a, *diagA, *offdiagA; 2439 PetscInt r; 2440 PetscErrorCode ierr; 2441 2442 PetscFunctionBegin; 2443 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2444 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2445 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2446 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2447 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2448 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 for (r = 0; r < n; ++r) { 2452 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2453 a[r] = diagA[r]; 2454 idx[r] = cstart + diagIdx[r]; 2455 } else { 2456 a[r] = offdiagA[r]; 2457 idx[r] = cmap[offdiagIdx[r]]; 2458 } 2459 } 2460 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2461 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2462 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2463 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2464 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2465 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 #undef __FUNCT__ 2470 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2471 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2472 { 2473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2474 PetscInt n = A->rmap->n; 2475 PetscInt cstart = A->cmap->rstart; 2476 PetscInt *cmap = mat->garray; 2477 PetscInt *diagIdx, *offdiagIdx; 2478 Vec diagV, offdiagV; 2479 PetscScalar *a, *diagA, *offdiagA; 2480 PetscInt r; 2481 PetscErrorCode ierr; 2482 2483 PetscFunctionBegin; 2484 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2485 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2486 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2487 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2488 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2489 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2490 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2491 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2492 for (r = 0; r < n; ++r) { 2493 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2494 a[r] = diagA[r]; 2495 idx[r] = cstart + diagIdx[r]; 2496 } else { 2497 a[r] = offdiagA[r]; 2498 idx[r] = cmap[offdiagIdx[r]]; 2499 } 2500 } 2501 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2502 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2503 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2504 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2505 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2506 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2512 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2513 { 2514 PetscErrorCode ierr; 2515 Mat *dummy; 2516 2517 PetscFunctionBegin; 2518 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2519 *newmat = *dummy; 2520 ierr = PetscFree(dummy);CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 #undef __FUNCT__ 2525 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2526 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2527 { 2528 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2529 PetscErrorCode ierr; 2530 2531 PetscFunctionBegin; 2532 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2533 PetscFunctionReturn(0); 2534 } 2535 2536 #undef __FUNCT__ 2537 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2538 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2539 { 2540 PetscErrorCode ierr; 2541 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2542 2543 PetscFunctionBegin; 2544 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2545 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2546 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2547 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2548 PetscFunctionReturn(0); 2549 } 2550 2551 /* -------------------------------------------------------------------*/ 2552 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2553 MatGetRow_MPIAIJ, 2554 MatRestoreRow_MPIAIJ, 2555 MatMult_MPIAIJ, 2556 /* 4*/ MatMultAdd_MPIAIJ, 2557 MatMultTranspose_MPIAIJ, 2558 MatMultTransposeAdd_MPIAIJ, 2559 #if defined(PETSC_HAVE_PBGL) 2560 MatSolve_MPIAIJ, 2561 #else 2562 0, 2563 #endif 2564 0, 2565 0, 2566 /*10*/ 0, 2567 0, 2568 0, 2569 MatSOR_MPIAIJ, 2570 MatTranspose_MPIAIJ, 2571 /*15*/ MatGetInfo_MPIAIJ, 2572 MatEqual_MPIAIJ, 2573 MatGetDiagonal_MPIAIJ, 2574 MatDiagonalScale_MPIAIJ, 2575 MatNorm_MPIAIJ, 2576 /*20*/ MatAssemblyBegin_MPIAIJ, 2577 MatAssemblyEnd_MPIAIJ, 2578 MatSetOption_MPIAIJ, 2579 MatZeroEntries_MPIAIJ, 2580 /*24*/ MatZeroRows_MPIAIJ, 2581 0, 2582 #if defined(PETSC_HAVE_PBGL) 2583 0, 2584 #else 2585 0, 2586 #endif 2587 0, 2588 0, 2589 /*29*/ MatSetUp_MPIAIJ, 2590 #if defined(PETSC_HAVE_PBGL) 2591 0, 2592 #else 2593 0, 2594 #endif 2595 0, 2596 0, 2597 0, 2598 /*34*/ MatDuplicate_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 0, 2603 /*39*/ MatAXPY_MPIAIJ, 2604 MatGetSubMatrices_MPIAIJ, 2605 MatIncreaseOverlap_MPIAIJ, 2606 MatGetValues_MPIAIJ, 2607 MatCopy_MPIAIJ, 2608 /*44*/ MatGetRowMax_MPIAIJ, 2609 MatScale_MPIAIJ, 2610 0, 2611 MatDiagonalSet_MPIAIJ, 2612 MatZeroRowsColumns_MPIAIJ, 2613 /*49*/ MatSetRandom_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*54*/ MatFDColoringCreate_MPIXAIJ, 2619 0, 2620 MatSetUnfactored_MPIAIJ, 2621 MatPermute_MPIAIJ, 2622 0, 2623 /*59*/ MatGetSubMatrix_MPIAIJ, 2624 MatDestroy_MPIAIJ, 2625 MatView_MPIAIJ, 2626 0, 2627 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2628 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2629 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2630 0, 2631 0, 2632 0, 2633 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2634 MatGetRowMinAbs_MPIAIJ, 2635 0, 2636 MatSetColoring_MPIAIJ, 2637 0, 2638 MatSetValuesAdifor_MPIAIJ, 2639 /*75*/ MatFDColoringApply_AIJ, 2640 0, 2641 0, 2642 0, 2643 MatFindZeroDiagonals_MPIAIJ, 2644 /*80*/ 0, 2645 0, 2646 0, 2647 /*83*/ MatLoad_MPIAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 0, 2653 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2654 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2655 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2656 MatPtAP_MPIAIJ_MPIAIJ, 2657 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2658 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*99*/ 0, 2664 0, 2665 0, 2666 MatConjugate_MPIAIJ, 2667 0, 2668 /*104*/MatSetValuesRow_MPIAIJ, 2669 MatRealPart_MPIAIJ, 2670 MatImaginaryPart_MPIAIJ, 2671 0, 2672 0, 2673 /*109*/0, 2674 0, 2675 MatGetRowMin_MPIAIJ, 2676 0, 2677 0, 2678 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*119*/0, 2684 0, 2685 0, 2686 0, 2687 MatGetMultiProcBlock_MPIAIJ, 2688 /*124*/MatFindNonzeroRows_MPIAIJ, 2689 MatGetColumnNorms_MPIAIJ, 2690 MatInvertBlockDiagonal_MPIAIJ, 2691 0, 2692 MatGetSubMatricesParallel_MPIAIJ, 2693 /*129*/0, 2694 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2695 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2696 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2697 0, 2698 /*134*/0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*139*/0, 2704 0, 2705 0, 2706 MatFDColoringSetUp_MPIXAIJ, 2707 0, 2708 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2709 }; 2710 2711 /* ----------------------------------------------------------------------------------------*/ 2712 2713 #undef __FUNCT__ 2714 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2715 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2716 { 2717 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2718 PetscErrorCode ierr; 2719 2720 PetscFunctionBegin; 2721 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2722 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2728 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2729 { 2730 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2731 PetscErrorCode ierr; 2732 2733 PetscFunctionBegin; 2734 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2735 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2736 PetscFunctionReturn(0); 2737 } 2738 2739 #undef __FUNCT__ 2740 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2741 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2742 { 2743 Mat_MPIAIJ *b; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2748 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2749 b = (Mat_MPIAIJ*)B->data; 2750 2751 if (!B->preallocated) { 2752 /* Explicitly create 2 MATSEQAIJ matrices. */ 2753 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2754 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2755 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2756 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2757 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2758 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2759 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2760 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2761 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2762 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2763 } 2764 2765 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2766 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2767 B->preallocated = PETSC_TRUE; 2768 PetscFunctionReturn(0); 2769 } 2770 2771 #undef __FUNCT__ 2772 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2773 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2774 { 2775 Mat mat; 2776 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2777 PetscErrorCode ierr; 2778 2779 PetscFunctionBegin; 2780 *newmat = 0; 2781 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2782 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2783 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2784 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2785 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2786 a = (Mat_MPIAIJ*)mat->data; 2787 2788 mat->factortype = matin->factortype; 2789 mat->assembled = PETSC_TRUE; 2790 mat->insertmode = NOT_SET_VALUES; 2791 mat->preallocated = PETSC_TRUE; 2792 2793 a->size = oldmat->size; 2794 a->rank = oldmat->rank; 2795 a->donotstash = oldmat->donotstash; 2796 a->roworiented = oldmat->roworiented; 2797 a->rowindices = 0; 2798 a->rowvalues = 0; 2799 a->getrowactive = PETSC_FALSE; 2800 2801 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2803 2804 if (oldmat->colmap) { 2805 #if defined(PETSC_USE_CTABLE) 2806 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2807 #else 2808 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 2809 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2810 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2811 #endif 2812 } else a->colmap = 0; 2813 if (oldmat->garray) { 2814 PetscInt len; 2815 len = oldmat->B->cmap->n; 2816 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 2817 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2818 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2819 } else a->garray = 0; 2820 2821 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2822 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2823 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2825 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2826 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2827 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2828 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2829 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2830 *newmat = mat; 2831 PetscFunctionReturn(0); 2832 } 2833 2834 2835 2836 #undef __FUNCT__ 2837 #define __FUNCT__ "MatLoad_MPIAIJ" 2838 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2839 { 2840 PetscScalar *vals,*svals; 2841 MPI_Comm comm; 2842 PetscErrorCode ierr; 2843 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2844 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 2845 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2846 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2847 PetscInt cend,cstart,n,*rowners,sizesset=1; 2848 int fd; 2849 PetscInt bs = 1; 2850 2851 PetscFunctionBegin; 2852 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2853 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2854 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2855 if (!rank) { 2856 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2857 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2858 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2859 } 2860 2861 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 2862 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2863 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2864 2865 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 2866 2867 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2868 M = header[1]; N = header[2]; 2869 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 2870 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 2871 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 2872 2873 /* If global sizes are set, check if they are consistent with that given in the file */ 2874 if (sizesset) { 2875 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 2876 } 2877 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 2878 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 2879 2880 /* determine ownership of all (block) rows */ 2881 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2882 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2883 else m = newMat->rmap->n; /* Set by user */ 2884 2885 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 2886 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2887 2888 /* First process needs enough room for process with most rows */ 2889 if (!rank) { 2890 mmax = rowners[1]; 2891 for (i=2; i<=size; i++) { 2892 mmax = PetscMax(mmax, rowners[i]); 2893 } 2894 } else mmax = -1; /* unused, but compilers complain */ 2895 2896 rowners[0] = 0; 2897 for (i=2; i<=size; i++) { 2898 rowners[i] += rowners[i-1]; 2899 } 2900 rstart = rowners[rank]; 2901 rend = rowners[rank+1]; 2902 2903 /* distribute row lengths to all processors */ 2904 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2905 if (!rank) { 2906 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2907 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2908 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2909 for (j=0; j<m; j++) { 2910 procsnz[0] += ourlens[j]; 2911 } 2912 for (i=1; i<size; i++) { 2913 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2914 /* calculate the number of nonzeros on each processor */ 2915 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2916 procsnz[i] += rowlengths[j]; 2917 } 2918 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2919 } 2920 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2921 } else { 2922 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2923 } 2924 2925 if (!rank) { 2926 /* determine max buffer needed and allocate it */ 2927 maxnz = 0; 2928 for (i=0; i<size; i++) { 2929 maxnz = PetscMax(maxnz,procsnz[i]); 2930 } 2931 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2932 2933 /* read in my part of the matrix column indices */ 2934 nz = procsnz[0]; 2935 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2936 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2937 2938 /* read in every one elses and ship off */ 2939 for (i=1; i<size; i++) { 2940 nz = procsnz[i]; 2941 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2942 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2943 } 2944 ierr = PetscFree(cols);CHKERRQ(ierr); 2945 } else { 2946 /* determine buffer space needed for message */ 2947 nz = 0; 2948 for (i=0; i<m; i++) { 2949 nz += ourlens[i]; 2950 } 2951 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2952 2953 /* receive message of column indices*/ 2954 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2955 } 2956 2957 /* determine column ownership if matrix is not square */ 2958 if (N != M) { 2959 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2960 else n = newMat->cmap->n; 2961 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2962 cstart = cend - n; 2963 } else { 2964 cstart = rstart; 2965 cend = rend; 2966 n = cend - cstart; 2967 } 2968 2969 /* loop over local rows, determining number of off diagonal entries */ 2970 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2971 jj = 0; 2972 for (i=0; i<m; i++) { 2973 for (j=0; j<ourlens[i]; j++) { 2974 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2975 jj++; 2976 } 2977 } 2978 2979 for (i=0; i<m; i++) { 2980 ourlens[i] -= offlens[i]; 2981 } 2982 if (!sizesset) { 2983 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2984 } 2985 2986 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2987 2988 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2989 2990 for (i=0; i<m; i++) { 2991 ourlens[i] += offlens[i]; 2992 } 2993 2994 if (!rank) { 2995 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 2996 2997 /* read in my part of the matrix numerical values */ 2998 nz = procsnz[0]; 2999 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3000 3001 /* insert into matrix */ 3002 jj = rstart; 3003 smycols = mycols; 3004 svals = vals; 3005 for (i=0; i<m; i++) { 3006 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3007 smycols += ourlens[i]; 3008 svals += ourlens[i]; 3009 jj++; 3010 } 3011 3012 /* read in other processors and ship out */ 3013 for (i=1; i<size; i++) { 3014 nz = procsnz[i]; 3015 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3016 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3017 } 3018 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3019 } else { 3020 /* receive numeric values */ 3021 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3022 3023 /* receive message of values*/ 3024 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3025 3026 /* insert into matrix */ 3027 jj = rstart; 3028 smycols = mycols; 3029 svals = vals; 3030 for (i=0; i<m; i++) { 3031 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3032 smycols += ourlens[i]; 3033 svals += ourlens[i]; 3034 jj++; 3035 } 3036 } 3037 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3038 ierr = PetscFree(vals);CHKERRQ(ierr); 3039 ierr = PetscFree(mycols);CHKERRQ(ierr); 3040 ierr = PetscFree(rowners);CHKERRQ(ierr); 3041 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3042 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3043 PetscFunctionReturn(0); 3044 } 3045 3046 #undef __FUNCT__ 3047 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3048 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3049 { 3050 PetscErrorCode ierr; 3051 IS iscol_local; 3052 PetscInt csize; 3053 3054 PetscFunctionBegin; 3055 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3056 if (call == MAT_REUSE_MATRIX) { 3057 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3058 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3059 } else { 3060 PetscInt cbs; 3061 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3062 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3063 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3064 } 3065 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3066 if (call == MAT_INITIAL_MATRIX) { 3067 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3068 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3069 } 3070 PetscFunctionReturn(0); 3071 } 3072 3073 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3074 #undef __FUNCT__ 3075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3076 /* 3077 Not great since it makes two copies of the submatrix, first an SeqAIJ 3078 in local and then by concatenating the local matrices the end result. 3079 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3080 3081 Note: This requires a sequential iscol with all indices. 3082 */ 3083 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3084 { 3085 PetscErrorCode ierr; 3086 PetscMPIInt rank,size; 3087 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3088 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3089 PetscBool allcolumns, colflag; 3090 Mat M,Mreuse; 3091 MatScalar *vwork,*aa; 3092 MPI_Comm comm; 3093 Mat_SeqAIJ *aij; 3094 3095 PetscFunctionBegin; 3096 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3097 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3098 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3099 3100 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3101 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3102 if (colflag && ncol == mat->cmap->N) { 3103 allcolumns = PETSC_TRUE; 3104 } else { 3105 allcolumns = PETSC_FALSE; 3106 } 3107 if (call == MAT_REUSE_MATRIX) { 3108 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3109 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3110 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3111 } else { 3112 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3113 } 3114 3115 /* 3116 m - number of local rows 3117 n - number of columns (same on all processors) 3118 rstart - first row in new global matrix generated 3119 */ 3120 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3121 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3122 if (call == MAT_INITIAL_MATRIX) { 3123 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3124 ii = aij->i; 3125 jj = aij->j; 3126 3127 /* 3128 Determine the number of non-zeros in the diagonal and off-diagonal 3129 portions of the matrix in order to do correct preallocation 3130 */ 3131 3132 /* first get start and end of "diagonal" columns */ 3133 if (csize == PETSC_DECIDE) { 3134 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3135 if (mglobal == n) { /* square matrix */ 3136 nlocal = m; 3137 } else { 3138 nlocal = n/size + ((n % size) > rank); 3139 } 3140 } else { 3141 nlocal = csize; 3142 } 3143 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3144 rstart = rend - nlocal; 3145 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3146 3147 /* next, compute all the lengths */ 3148 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3149 olens = dlens + m; 3150 for (i=0; i<m; i++) { 3151 jend = ii[i+1] - ii[i]; 3152 olen = 0; 3153 dlen = 0; 3154 for (j=0; j<jend; j++) { 3155 if (*jj < rstart || *jj >= rend) olen++; 3156 else dlen++; 3157 jj++; 3158 } 3159 olens[i] = olen; 3160 dlens[i] = dlen; 3161 } 3162 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3163 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3164 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3165 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3166 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3167 ierr = PetscFree(dlens);CHKERRQ(ierr); 3168 } else { 3169 PetscInt ml,nl; 3170 3171 M = *newmat; 3172 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3173 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3174 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3175 /* 3176 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3177 rather than the slower MatSetValues(). 3178 */ 3179 M->was_assembled = PETSC_TRUE; 3180 M->assembled = PETSC_FALSE; 3181 } 3182 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3183 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3184 ii = aij->i; 3185 jj = aij->j; 3186 aa = aij->a; 3187 for (i=0; i<m; i++) { 3188 row = rstart + i; 3189 nz = ii[i+1] - ii[i]; 3190 cwork = jj; jj += nz; 3191 vwork = aa; aa += nz; 3192 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3193 } 3194 3195 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3196 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3197 *newmat = M; 3198 3199 /* save submatrix used in processor for next request */ 3200 if (call == MAT_INITIAL_MATRIX) { 3201 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3202 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3203 } 3204 PetscFunctionReturn(0); 3205 } 3206 3207 #undef __FUNCT__ 3208 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3209 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3210 { 3211 PetscInt m,cstart, cend,j,nnz,i,d; 3212 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3213 const PetscInt *JJ; 3214 PetscScalar *values; 3215 PetscErrorCode ierr; 3216 3217 PetscFunctionBegin; 3218 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3219 3220 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3221 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3222 m = B->rmap->n; 3223 cstart = B->cmap->rstart; 3224 cend = B->cmap->rend; 3225 rstart = B->rmap->rstart; 3226 3227 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3228 3229 #if defined(PETSC_USE_DEBUGGING) 3230 for (i=0; i<m; i++) { 3231 nnz = Ii[i+1]- Ii[i]; 3232 JJ = J + Ii[i]; 3233 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3234 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3235 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3236 } 3237 #endif 3238 3239 for (i=0; i<m; i++) { 3240 nnz = Ii[i+1]- Ii[i]; 3241 JJ = J + Ii[i]; 3242 nnz_max = PetscMax(nnz_max,nnz); 3243 d = 0; 3244 for (j=0; j<nnz; j++) { 3245 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3246 } 3247 d_nnz[i] = d; 3248 o_nnz[i] = nnz - d; 3249 } 3250 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3251 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3252 3253 if (v) values = (PetscScalar*)v; 3254 else { 3255 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3256 } 3257 3258 for (i=0; i<m; i++) { 3259 ii = i + rstart; 3260 nnz = Ii[i+1]- Ii[i]; 3261 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3262 } 3263 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 3266 if (!v) { 3267 ierr = PetscFree(values);CHKERRQ(ierr); 3268 } 3269 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3270 PetscFunctionReturn(0); 3271 } 3272 3273 #undef __FUNCT__ 3274 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3275 /*@ 3276 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3277 (the default parallel PETSc format). 3278 3279 Collective on MPI_Comm 3280 3281 Input Parameters: 3282 + B - the matrix 3283 . i - the indices into j for the start of each local row (starts with zero) 3284 . j - the column indices for each local row (starts with zero) 3285 - v - optional values in the matrix 3286 3287 Level: developer 3288 3289 Notes: 3290 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3291 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3292 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3293 3294 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3295 3296 The format which is used for the sparse matrix input, is equivalent to a 3297 row-major ordering.. i.e for the following matrix, the input data expected is 3298 as shown: 3299 3300 1 0 0 3301 2 0 3 P0 3302 ------- 3303 4 5 6 P1 3304 3305 Process0 [P0]: rows_owned=[0,1] 3306 i = {0,1,3} [size = nrow+1 = 2+1] 3307 j = {0,0,2} [size = nz = 6] 3308 v = {1,2,3} [size = nz = 6] 3309 3310 Process1 [P1]: rows_owned=[2] 3311 i = {0,3} [size = nrow+1 = 1+1] 3312 j = {0,1,2} [size = nz = 6] 3313 v = {4,5,6} [size = nz = 6] 3314 3315 .keywords: matrix, aij, compressed row, sparse, parallel 3316 3317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3318 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3319 @*/ 3320 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3321 { 3322 PetscErrorCode ierr; 3323 3324 PetscFunctionBegin; 3325 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 #undef __FUNCT__ 3330 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3331 /*@C 3332 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3333 (the default parallel PETSc format). For good matrix assembly performance 3334 the user should preallocate the matrix storage by setting the parameters 3335 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3336 performance can be increased by more than a factor of 50. 3337 3338 Collective on MPI_Comm 3339 3340 Input Parameters: 3341 + B - the matrix 3342 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3343 (same value is used for all local rows) 3344 . d_nnz - array containing the number of nonzeros in the various rows of the 3345 DIAGONAL portion of the local submatrix (possibly different for each row) 3346 or NULL, if d_nz is used to specify the nonzero structure. 3347 The size of this array is equal to the number of local rows, i.e 'm'. 3348 For matrices that will be factored, you must leave room for (and set) 3349 the diagonal entry even if it is zero. 3350 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3351 submatrix (same value is used for all local rows). 3352 - o_nnz - array containing the number of nonzeros in the various rows of the 3353 OFF-DIAGONAL portion of the local submatrix (possibly different for 3354 each row) or NULL, if o_nz is used to specify the nonzero 3355 structure. The size of this array is equal to the number 3356 of local rows, i.e 'm'. 3357 3358 If the *_nnz parameter is given then the *_nz parameter is ignored 3359 3360 The AIJ format (also called the Yale sparse matrix format or 3361 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3362 storage. The stored row and column indices begin with zero. 3363 See Users-Manual: ch_mat for details. 3364 3365 The parallel matrix is partitioned such that the first m0 rows belong to 3366 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3367 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3368 3369 The DIAGONAL portion of the local submatrix of a processor can be defined 3370 as the submatrix which is obtained by extraction the part corresponding to 3371 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3372 first row that belongs to the processor, r2 is the last row belonging to 3373 the this processor, and c1-c2 is range of indices of the local part of a 3374 vector suitable for applying the matrix to. This is an mxn matrix. In the 3375 common case of a square matrix, the row and column ranges are the same and 3376 the DIAGONAL part is also square. The remaining portion of the local 3377 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3378 3379 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3380 3381 You can call MatGetInfo() to get information on how effective the preallocation was; 3382 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3383 You can also run with the option -info and look for messages with the string 3384 malloc in them to see if additional memory allocation was needed. 3385 3386 Example usage: 3387 3388 Consider the following 8x8 matrix with 34 non-zero values, that is 3389 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3390 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3391 as follows: 3392 3393 .vb 3394 1 2 0 | 0 3 0 | 0 4 3395 Proc0 0 5 6 | 7 0 0 | 8 0 3396 9 0 10 | 11 0 0 | 12 0 3397 ------------------------------------- 3398 13 0 14 | 15 16 17 | 0 0 3399 Proc1 0 18 0 | 19 20 21 | 0 0 3400 0 0 0 | 22 23 0 | 24 0 3401 ------------------------------------- 3402 Proc2 25 26 27 | 0 0 28 | 29 0 3403 30 0 0 | 31 32 33 | 0 34 3404 .ve 3405 3406 This can be represented as a collection of submatrices as: 3407 3408 .vb 3409 A B C 3410 D E F 3411 G H I 3412 .ve 3413 3414 Where the submatrices A,B,C are owned by proc0, D,E,F are 3415 owned by proc1, G,H,I are owned by proc2. 3416 3417 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3418 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3419 The 'M','N' parameters are 8,8, and have the same values on all procs. 3420 3421 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3422 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3423 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3424 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3425 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3426 matrix, ans [DF] as another SeqAIJ matrix. 3427 3428 When d_nz, o_nz parameters are specified, d_nz storage elements are 3429 allocated for every row of the local diagonal submatrix, and o_nz 3430 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3431 One way to choose d_nz and o_nz is to use the max nonzerors per local 3432 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3433 In this case, the values of d_nz,o_nz are: 3434 .vb 3435 proc0 : dnz = 2, o_nz = 2 3436 proc1 : dnz = 3, o_nz = 2 3437 proc2 : dnz = 1, o_nz = 4 3438 .ve 3439 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3440 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3441 for proc3. i.e we are using 12+15+10=37 storage locations to store 3442 34 values. 3443 3444 When d_nnz, o_nnz parameters are specified, the storage is specified 3445 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3446 In the above case the values for d_nnz,o_nnz are: 3447 .vb 3448 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3449 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3450 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3451 .ve 3452 Here the space allocated is sum of all the above values i.e 34, and 3453 hence pre-allocation is perfect. 3454 3455 Level: intermediate 3456 3457 .keywords: matrix, aij, compressed row, sparse, parallel 3458 3459 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3460 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3461 @*/ 3462 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3463 { 3464 PetscErrorCode ierr; 3465 3466 PetscFunctionBegin; 3467 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3468 PetscValidType(B,1); 3469 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3470 PetscFunctionReturn(0); 3471 } 3472 3473 #undef __FUNCT__ 3474 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3475 /*@ 3476 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3477 CSR format the local rows. 3478 3479 Collective on MPI_Comm 3480 3481 Input Parameters: 3482 + comm - MPI communicator 3483 . m - number of local rows (Cannot be PETSC_DECIDE) 3484 . n - This value should be the same as the local size used in creating the 3485 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3486 calculated if N is given) For square matrices n is almost always m. 3487 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3488 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3489 . i - row indices 3490 . j - column indices 3491 - a - matrix values 3492 3493 Output Parameter: 3494 . mat - the matrix 3495 3496 Level: intermediate 3497 3498 Notes: 3499 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3500 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3501 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3502 3503 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3504 3505 The format which is used for the sparse matrix input, is equivalent to a 3506 row-major ordering.. i.e for the following matrix, the input data expected is 3507 as shown: 3508 3509 1 0 0 3510 2 0 3 P0 3511 ------- 3512 4 5 6 P1 3513 3514 Process0 [P0]: rows_owned=[0,1] 3515 i = {0,1,3} [size = nrow+1 = 2+1] 3516 j = {0,0,2} [size = nz = 6] 3517 v = {1,2,3} [size = nz = 6] 3518 3519 Process1 [P1]: rows_owned=[2] 3520 i = {0,3} [size = nrow+1 = 1+1] 3521 j = {0,1,2} [size = nz = 6] 3522 v = {4,5,6} [size = nz = 6] 3523 3524 .keywords: matrix, aij, compressed row, sparse, parallel 3525 3526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3527 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3528 @*/ 3529 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3530 { 3531 PetscErrorCode ierr; 3532 3533 PetscFunctionBegin; 3534 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3535 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3536 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3537 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3538 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3539 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3540 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 #undef __FUNCT__ 3545 #define __FUNCT__ "MatCreateAIJ" 3546 /*@C 3547 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3548 (the default parallel PETSc format). For good matrix assembly performance 3549 the user should preallocate the matrix storage by setting the parameters 3550 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3551 performance can be increased by more than a factor of 50. 3552 3553 Collective on MPI_Comm 3554 3555 Input Parameters: 3556 + comm - MPI communicator 3557 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3558 This value should be the same as the local size used in creating the 3559 y vector for the matrix-vector product y = Ax. 3560 . n - This value should be the same as the local size used in creating the 3561 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3562 calculated if N is given) For square matrices n is almost always m. 3563 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3564 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3565 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3566 (same value is used for all local rows) 3567 . d_nnz - array containing the number of nonzeros in the various rows of the 3568 DIAGONAL portion of the local submatrix (possibly different for each row) 3569 or NULL, if d_nz is used to specify the nonzero structure. 3570 The size of this array is equal to the number of local rows, i.e 'm'. 3571 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3572 submatrix (same value is used for all local rows). 3573 - o_nnz - array containing the number of nonzeros in the various rows of the 3574 OFF-DIAGONAL portion of the local submatrix (possibly different for 3575 each row) or NULL, if o_nz is used to specify the nonzero 3576 structure. The size of this array is equal to the number 3577 of local rows, i.e 'm'. 3578 3579 Output Parameter: 3580 . A - the matrix 3581 3582 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3583 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3584 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3585 3586 Notes: 3587 If the *_nnz parameter is given then the *_nz parameter is ignored 3588 3589 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3590 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3591 storage requirements for this matrix. 3592 3593 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3594 processor than it must be used on all processors that share the object for 3595 that argument. 3596 3597 The user MUST specify either the local or global matrix dimensions 3598 (possibly both). 3599 3600 The parallel matrix is partitioned across processors such that the 3601 first m0 rows belong to process 0, the next m1 rows belong to 3602 process 1, the next m2 rows belong to process 2 etc.. where 3603 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3604 values corresponding to [m x N] submatrix. 3605 3606 The columns are logically partitioned with the n0 columns belonging 3607 to 0th partition, the next n1 columns belonging to the next 3608 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3609 3610 The DIAGONAL portion of the local submatrix on any given processor 3611 is the submatrix corresponding to the rows and columns m,n 3612 corresponding to the given processor. i.e diagonal matrix on 3613 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3614 etc. The remaining portion of the local submatrix [m x (N-n)] 3615 constitute the OFF-DIAGONAL portion. The example below better 3616 illustrates this concept. 3617 3618 For a square global matrix we define each processor's diagonal portion 3619 to be its local rows and the corresponding columns (a square submatrix); 3620 each processor's off-diagonal portion encompasses the remainder of the 3621 local matrix (a rectangular submatrix). 3622 3623 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3624 3625 When calling this routine with a single process communicator, a matrix of 3626 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 3627 type of communicator, use the construction mechanism: 3628 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3629 3630 By default, this format uses inodes (identical nodes) when possible. 3631 We search for consecutive rows with the same nonzero structure, thereby 3632 reusing matrix information to achieve increased efficiency. 3633 3634 Options Database Keys: 3635 + -mat_no_inode - Do not use inodes 3636 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3637 - -mat_aij_oneindex - Internally use indexing starting at 1 3638 rather than 0. Note that when calling MatSetValues(), 3639 the user still MUST index entries starting at 0! 3640 3641 3642 Example usage: 3643 3644 Consider the following 8x8 matrix with 34 non-zero values, that is 3645 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3646 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3647 as follows: 3648 3649 .vb 3650 1 2 0 | 0 3 0 | 0 4 3651 Proc0 0 5 6 | 7 0 0 | 8 0 3652 9 0 10 | 11 0 0 | 12 0 3653 ------------------------------------- 3654 13 0 14 | 15 16 17 | 0 0 3655 Proc1 0 18 0 | 19 20 21 | 0 0 3656 0 0 0 | 22 23 0 | 24 0 3657 ------------------------------------- 3658 Proc2 25 26 27 | 0 0 28 | 29 0 3659 30 0 0 | 31 32 33 | 0 34 3660 .ve 3661 3662 This can be represented as a collection of submatrices as: 3663 3664 .vb 3665 A B C 3666 D E F 3667 G H I 3668 .ve 3669 3670 Where the submatrices A,B,C are owned by proc0, D,E,F are 3671 owned by proc1, G,H,I are owned by proc2. 3672 3673 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3674 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3675 The 'M','N' parameters are 8,8, and have the same values on all procs. 3676 3677 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3678 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3679 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3680 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3681 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3682 matrix, ans [DF] as another SeqAIJ matrix. 3683 3684 When d_nz, o_nz parameters are specified, d_nz storage elements are 3685 allocated for every row of the local diagonal submatrix, and o_nz 3686 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3687 One way to choose d_nz and o_nz is to use the max nonzerors per local 3688 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3689 In this case, the values of d_nz,o_nz are: 3690 .vb 3691 proc0 : dnz = 2, o_nz = 2 3692 proc1 : dnz = 3, o_nz = 2 3693 proc2 : dnz = 1, o_nz = 4 3694 .ve 3695 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3696 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3697 for proc3. i.e we are using 12+15+10=37 storage locations to store 3698 34 values. 3699 3700 When d_nnz, o_nnz parameters are specified, the storage is specified 3701 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3702 In the above case the values for d_nnz,o_nnz are: 3703 .vb 3704 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3705 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3706 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3707 .ve 3708 Here the space allocated is sum of all the above values i.e 34, and 3709 hence pre-allocation is perfect. 3710 3711 Level: intermediate 3712 3713 .keywords: matrix, aij, compressed row, sparse, parallel 3714 3715 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3716 MPIAIJ, MatCreateMPIAIJWithArrays() 3717 @*/ 3718 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3719 { 3720 PetscErrorCode ierr; 3721 PetscMPIInt size; 3722 3723 PetscFunctionBegin; 3724 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3725 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3726 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3727 if (size > 1) { 3728 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3729 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3730 } else { 3731 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3732 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3733 } 3734 PetscFunctionReturn(0); 3735 } 3736 3737 #undef __FUNCT__ 3738 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3739 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3740 { 3741 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3742 3743 PetscFunctionBegin; 3744 if (Ad) *Ad = a->A; 3745 if (Ao) *Ao = a->B; 3746 if (colmap) *colmap = a->garray; 3747 PetscFunctionReturn(0); 3748 } 3749 3750 #undef __FUNCT__ 3751 #define __FUNCT__ "MatSetColoring_MPIAIJ" 3752 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 3753 { 3754 PetscErrorCode ierr; 3755 PetscInt i; 3756 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3757 3758 PetscFunctionBegin; 3759 if (coloring->ctype == IS_COLORING_GLOBAL) { 3760 ISColoringValue *allcolors,*colors; 3761 ISColoring ocoloring; 3762 3763 /* set coloring for diagonal portion */ 3764 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 3765 3766 /* set coloring for off-diagonal portion */ 3767 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 3768 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3769 for (i=0; i<a->B->cmap->n; i++) { 3770 colors[i] = allcolors[a->garray[i]]; 3771 } 3772 ierr = PetscFree(allcolors);CHKERRQ(ierr); 3773 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3774 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3775 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3776 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 3777 ISColoringValue *colors; 3778 PetscInt *larray; 3779 ISColoring ocoloring; 3780 3781 /* set coloring for diagonal portion */ 3782 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 3783 for (i=0; i<a->A->cmap->n; i++) { 3784 larray[i] = i + A->cmap->rstart; 3785 } 3786 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 3787 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 3788 for (i=0; i<a->A->cmap->n; i++) { 3789 colors[i] = coloring->colors[larray[i]]; 3790 } 3791 ierr = PetscFree(larray);CHKERRQ(ierr); 3792 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3793 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 3794 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3795 3796 /* set coloring for off-diagonal portion */ 3797 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 3798 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 3799 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 3800 for (i=0; i<a->B->cmap->n; i++) { 3801 colors[i] = coloring->colors[larray[i]]; 3802 } 3803 ierr = PetscFree(larray);CHKERRQ(ierr); 3804 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 3805 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 3806 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 3807 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 3808 PetscFunctionReturn(0); 3809 } 3810 3811 #undef __FUNCT__ 3812 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 3813 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 3814 { 3815 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3816 PetscErrorCode ierr; 3817 3818 PetscFunctionBegin; 3819 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 3820 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823 3824 #undef __FUNCT__ 3825 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3826 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3827 { 3828 PetscErrorCode ierr; 3829 PetscInt m,N,i,rstart,nnz,Ii; 3830 PetscInt *indx; 3831 PetscScalar *values; 3832 3833 PetscFunctionBegin; 3834 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3835 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3836 PetscInt *dnz,*onz,sum,bs,cbs; 3837 3838 if (n == PETSC_DECIDE) { 3839 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3840 } 3841 /* Check sum(n) = N */ 3842 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3843 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3844 3845 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3846 rstart -= m; 3847 3848 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3849 for (i=0; i<m; i++) { 3850 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3851 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3852 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3853 } 3854 3855 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3856 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3857 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3858 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3859 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3860 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3861 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3862 } 3863 3864 /* numeric phase */ 3865 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3866 for (i=0; i<m; i++) { 3867 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3868 Ii = i + rstart; 3869 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3870 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3871 } 3872 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3873 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3874 PetscFunctionReturn(0); 3875 } 3876 3877 #undef __FUNCT__ 3878 #define __FUNCT__ "MatFileSplit" 3879 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3880 { 3881 PetscErrorCode ierr; 3882 PetscMPIInt rank; 3883 PetscInt m,N,i,rstart,nnz; 3884 size_t len; 3885 const PetscInt *indx; 3886 PetscViewer out; 3887 char *name; 3888 Mat B; 3889 const PetscScalar *values; 3890 3891 PetscFunctionBegin; 3892 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3893 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3894 /* Should this be the type of the diagonal block of A? */ 3895 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3896 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3897 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3898 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3899 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3900 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3901 for (i=0; i<m; i++) { 3902 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3903 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3904 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3905 } 3906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3908 3909 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3910 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3911 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 3912 sprintf(name,"%s.%d",outfile,rank); 3913 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3914 ierr = PetscFree(name);CHKERRQ(ierr); 3915 ierr = MatView(B,out);CHKERRQ(ierr); 3916 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3917 ierr = MatDestroy(&B);CHKERRQ(ierr); 3918 PetscFunctionReturn(0); 3919 } 3920 3921 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3922 #undef __FUNCT__ 3923 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3924 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3925 { 3926 PetscErrorCode ierr; 3927 Mat_Merge_SeqsToMPI *merge; 3928 PetscContainer container; 3929 3930 PetscFunctionBegin; 3931 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3932 if (container) { 3933 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3934 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3935 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3936 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3937 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3938 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3939 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3940 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3941 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3942 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3943 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3944 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3945 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3946 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3947 ierr = PetscFree(merge);CHKERRQ(ierr); 3948 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3949 } 3950 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3951 PetscFunctionReturn(0); 3952 } 3953 3954 #include <../src/mat/utils/freespace.h> 3955 #include <petscbt.h> 3956 3957 #undef __FUNCT__ 3958 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3959 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3960 { 3961 PetscErrorCode ierr; 3962 MPI_Comm comm; 3963 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3964 PetscMPIInt size,rank,taga,*len_s; 3965 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3966 PetscInt proc,m; 3967 PetscInt **buf_ri,**buf_rj; 3968 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3969 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3970 MPI_Request *s_waits,*r_waits; 3971 MPI_Status *status; 3972 MatScalar *aa=a->a; 3973 MatScalar **abuf_r,*ba_i; 3974 Mat_Merge_SeqsToMPI *merge; 3975 PetscContainer container; 3976 3977 PetscFunctionBegin; 3978 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3979 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3980 3981 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3982 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3983 3984 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3985 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3986 3987 bi = merge->bi; 3988 bj = merge->bj; 3989 buf_ri = merge->buf_ri; 3990 buf_rj = merge->buf_rj; 3991 3992 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3993 owners = merge->rowmap->range; 3994 len_s = merge->len_s; 3995 3996 /* send and recv matrix values */ 3997 /*-----------------------------*/ 3998 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3999 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4000 4001 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4002 for (proc=0,k=0; proc<size; proc++) { 4003 if (!len_s[proc]) continue; 4004 i = owners[proc]; 4005 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4006 k++; 4007 } 4008 4009 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4010 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4011 ierr = PetscFree(status);CHKERRQ(ierr); 4012 4013 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4014 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4015 4016 /* insert mat values of mpimat */ 4017 /*----------------------------*/ 4018 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4019 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4020 4021 for (k=0; k<merge->nrecv; k++) { 4022 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4023 nrows = *(buf_ri_k[k]); 4024 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4025 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4026 } 4027 4028 /* set values of ba */ 4029 m = merge->rowmap->n; 4030 for (i=0; i<m; i++) { 4031 arow = owners[rank] + i; 4032 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4033 bnzi = bi[i+1] - bi[i]; 4034 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4035 4036 /* add local non-zero vals of this proc's seqmat into ba */ 4037 anzi = ai[arow+1] - ai[arow]; 4038 aj = a->j + ai[arow]; 4039 aa = a->a + ai[arow]; 4040 nextaj = 0; 4041 for (j=0; nextaj<anzi; j++) { 4042 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4043 ba_i[j] += aa[nextaj++]; 4044 } 4045 } 4046 4047 /* add received vals into ba */ 4048 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4049 /* i-th row */ 4050 if (i == *nextrow[k]) { 4051 anzi = *(nextai[k]+1) - *nextai[k]; 4052 aj = buf_rj[k] + *(nextai[k]); 4053 aa = abuf_r[k] + *(nextai[k]); 4054 nextaj = 0; 4055 for (j=0; nextaj<anzi; j++) { 4056 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4057 ba_i[j] += aa[nextaj++]; 4058 } 4059 } 4060 nextrow[k]++; nextai[k]++; 4061 } 4062 } 4063 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4064 } 4065 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4066 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4067 4068 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4069 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4070 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4071 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4072 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4073 PetscFunctionReturn(0); 4074 } 4075 4076 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4077 4078 #undef __FUNCT__ 4079 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4080 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4081 { 4082 PetscErrorCode ierr; 4083 Mat B_mpi; 4084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4085 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4086 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4087 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4088 PetscInt len,proc,*dnz,*onz,bs,cbs; 4089 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4090 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4091 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4092 MPI_Status *status; 4093 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4094 PetscBT lnkbt; 4095 Mat_Merge_SeqsToMPI *merge; 4096 PetscContainer container; 4097 4098 PetscFunctionBegin; 4099 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4100 4101 /* make sure it is a PETSc comm */ 4102 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4103 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4104 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4105 4106 ierr = PetscNew(&merge);CHKERRQ(ierr); 4107 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4108 4109 /* determine row ownership */ 4110 /*---------------------------------------------------------*/ 4111 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4112 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4113 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4114 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4115 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4116 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4117 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4118 4119 m = merge->rowmap->n; 4120 owners = merge->rowmap->range; 4121 4122 /* determine the number of messages to send, their lengths */ 4123 /*---------------------------------------------------------*/ 4124 len_s = merge->len_s; 4125 4126 len = 0; /* length of buf_si[] */ 4127 merge->nsend = 0; 4128 for (proc=0; proc<size; proc++) { 4129 len_si[proc] = 0; 4130 if (proc == rank) { 4131 len_s[proc] = 0; 4132 } else { 4133 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4134 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4135 } 4136 if (len_s[proc]) { 4137 merge->nsend++; 4138 nrows = 0; 4139 for (i=owners[proc]; i<owners[proc+1]; i++) { 4140 if (ai[i+1] > ai[i]) nrows++; 4141 } 4142 len_si[proc] = 2*(nrows+1); 4143 len += len_si[proc]; 4144 } 4145 } 4146 4147 /* determine the number and length of messages to receive for ij-structure */ 4148 /*-------------------------------------------------------------------------*/ 4149 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4150 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4151 4152 /* post the Irecv of j-structure */ 4153 /*-------------------------------*/ 4154 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4155 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4156 4157 /* post the Isend of j-structure */ 4158 /*--------------------------------*/ 4159 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4160 4161 for (proc=0, k=0; proc<size; proc++) { 4162 if (!len_s[proc]) continue; 4163 i = owners[proc]; 4164 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4165 k++; 4166 } 4167 4168 /* receives and sends of j-structure are complete */ 4169 /*------------------------------------------------*/ 4170 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4171 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4172 4173 /* send and recv i-structure */ 4174 /*---------------------------*/ 4175 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4176 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4177 4178 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4179 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4180 for (proc=0,k=0; proc<size; proc++) { 4181 if (!len_s[proc]) continue; 4182 /* form outgoing message for i-structure: 4183 buf_si[0]: nrows to be sent 4184 [1:nrows]: row index (global) 4185 [nrows+1:2*nrows+1]: i-structure index 4186 */ 4187 /*-------------------------------------------*/ 4188 nrows = len_si[proc]/2 - 1; 4189 buf_si_i = buf_si + nrows+1; 4190 buf_si[0] = nrows; 4191 buf_si_i[0] = 0; 4192 nrows = 0; 4193 for (i=owners[proc]; i<owners[proc+1]; i++) { 4194 anzi = ai[i+1] - ai[i]; 4195 if (anzi) { 4196 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4197 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4198 nrows++; 4199 } 4200 } 4201 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4202 k++; 4203 buf_si += len_si[proc]; 4204 } 4205 4206 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4207 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4208 4209 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4210 for (i=0; i<merge->nrecv; i++) { 4211 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4212 } 4213 4214 ierr = PetscFree(len_si);CHKERRQ(ierr); 4215 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4216 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4217 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4218 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4219 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4220 ierr = PetscFree(status);CHKERRQ(ierr); 4221 4222 /* compute a local seq matrix in each processor */ 4223 /*----------------------------------------------*/ 4224 /* allocate bi array and free space for accumulating nonzero column info */ 4225 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4226 bi[0] = 0; 4227 4228 /* create and initialize a linked list */ 4229 nlnk = N+1; 4230 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4231 4232 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4233 len = ai[owners[rank+1]] - ai[owners[rank]]; 4234 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4235 4236 current_space = free_space; 4237 4238 /* determine symbolic info for each local row */ 4239 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4240 4241 for (k=0; k<merge->nrecv; k++) { 4242 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4243 nrows = *buf_ri_k[k]; 4244 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4245 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4246 } 4247 4248 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4249 len = 0; 4250 for (i=0; i<m; i++) { 4251 bnzi = 0; 4252 /* add local non-zero cols of this proc's seqmat into lnk */ 4253 arow = owners[rank] + i; 4254 anzi = ai[arow+1] - ai[arow]; 4255 aj = a->j + ai[arow]; 4256 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4257 bnzi += nlnk; 4258 /* add received col data into lnk */ 4259 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4260 if (i == *nextrow[k]) { /* i-th row */ 4261 anzi = *(nextai[k]+1) - *nextai[k]; 4262 aj = buf_rj[k] + *nextai[k]; 4263 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4264 bnzi += nlnk; 4265 nextrow[k]++; nextai[k]++; 4266 } 4267 } 4268 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4269 4270 /* if free space is not available, make more free space */ 4271 if (current_space->local_remaining<bnzi) { 4272 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4273 nspacedouble++; 4274 } 4275 /* copy data into free space, then initialize lnk */ 4276 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4277 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4278 4279 current_space->array += bnzi; 4280 current_space->local_used += bnzi; 4281 current_space->local_remaining -= bnzi; 4282 4283 bi[i+1] = bi[i] + bnzi; 4284 } 4285 4286 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4287 4288 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4289 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4290 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4291 4292 /* create symbolic parallel matrix B_mpi */ 4293 /*---------------------------------------*/ 4294 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4295 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4296 if (n==PETSC_DECIDE) { 4297 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4298 } else { 4299 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4300 } 4301 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4302 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4303 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4304 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4305 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4306 4307 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4308 B_mpi->assembled = PETSC_FALSE; 4309 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4310 merge->bi = bi; 4311 merge->bj = bj; 4312 merge->buf_ri = buf_ri; 4313 merge->buf_rj = buf_rj; 4314 merge->coi = NULL; 4315 merge->coj = NULL; 4316 merge->owners_co = NULL; 4317 4318 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4319 4320 /* attach the supporting struct to B_mpi for reuse */ 4321 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4322 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4323 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4324 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4325 *mpimat = B_mpi; 4326 4327 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4328 PetscFunctionReturn(0); 4329 } 4330 4331 #undef __FUNCT__ 4332 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4333 /*@C 4334 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4335 matrices from each processor 4336 4337 Collective on MPI_Comm 4338 4339 Input Parameters: 4340 + comm - the communicators the parallel matrix will live on 4341 . seqmat - the input sequential matrices 4342 . m - number of local rows (or PETSC_DECIDE) 4343 . n - number of local columns (or PETSC_DECIDE) 4344 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4345 4346 Output Parameter: 4347 . mpimat - the parallel matrix generated 4348 4349 Level: advanced 4350 4351 Notes: 4352 The dimensions of the sequential matrix in each processor MUST be the same. 4353 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4354 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4355 @*/ 4356 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4357 { 4358 PetscErrorCode ierr; 4359 PetscMPIInt size; 4360 4361 PetscFunctionBegin; 4362 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4363 if (size == 1) { 4364 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4365 if (scall == MAT_INITIAL_MATRIX) { 4366 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4367 } else { 4368 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4369 } 4370 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4371 PetscFunctionReturn(0); 4372 } 4373 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4374 if (scall == MAT_INITIAL_MATRIX) { 4375 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4376 } 4377 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4378 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4379 PetscFunctionReturn(0); 4380 } 4381 4382 #undef __FUNCT__ 4383 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4384 /*@ 4385 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4386 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4387 with MatGetSize() 4388 4389 Not Collective 4390 4391 Input Parameters: 4392 + A - the matrix 4393 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4394 4395 Output Parameter: 4396 . A_loc - the local sequential matrix generated 4397 4398 Level: developer 4399 4400 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4401 4402 @*/ 4403 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4404 { 4405 PetscErrorCode ierr; 4406 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4407 Mat_SeqAIJ *mat,*a,*b; 4408 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4409 MatScalar *aa,*ba,*cam; 4410 PetscScalar *ca; 4411 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4412 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4413 PetscBool match; 4414 MPI_Comm comm; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4419 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4420 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4421 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4422 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4423 4424 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4425 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4426 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4427 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4428 aa = a->a; ba = b->a; 4429 if (scall == MAT_INITIAL_MATRIX) { 4430 if (size == 1) { 4431 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4436 ci[0] = 0; 4437 for (i=0; i<am; i++) { 4438 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4439 } 4440 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4441 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4442 k = 0; 4443 for (i=0; i<am; i++) { 4444 ncols_o = bi[i+1] - bi[i]; 4445 ncols_d = ai[i+1] - ai[i]; 4446 /* off-diagonal portion of A */ 4447 for (jo=0; jo<ncols_o; jo++) { 4448 col = cmap[*bj]; 4449 if (col >= cstart) break; 4450 cj[k] = col; bj++; 4451 ca[k++] = *ba++; 4452 } 4453 /* diagonal portion of A */ 4454 for (j=0; j<ncols_d; j++) { 4455 cj[k] = cstart + *aj++; 4456 ca[k++] = *aa++; 4457 } 4458 /* off-diagonal portion of A */ 4459 for (j=jo; j<ncols_o; j++) { 4460 cj[k] = cmap[*bj++]; 4461 ca[k++] = *ba++; 4462 } 4463 } 4464 /* put together the new matrix */ 4465 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4466 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4467 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4468 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4469 mat->free_a = PETSC_TRUE; 4470 mat->free_ij = PETSC_TRUE; 4471 mat->nonew = 0; 4472 } else if (scall == MAT_REUSE_MATRIX) { 4473 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4474 ci = mat->i; cj = mat->j; cam = mat->a; 4475 for (i=0; i<am; i++) { 4476 /* off-diagonal portion of A */ 4477 ncols_o = bi[i+1] - bi[i]; 4478 for (jo=0; jo<ncols_o; jo++) { 4479 col = cmap[*bj]; 4480 if (col >= cstart) break; 4481 *cam++ = *ba++; bj++; 4482 } 4483 /* diagonal portion of A */ 4484 ncols_d = ai[i+1] - ai[i]; 4485 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4486 /* off-diagonal portion of A */ 4487 for (j=jo; j<ncols_o; j++) { 4488 *cam++ = *ba++; bj++; 4489 } 4490 } 4491 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4492 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4493 PetscFunctionReturn(0); 4494 } 4495 4496 #undef __FUNCT__ 4497 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4498 /*@C 4499 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 4500 4501 Not Collective 4502 4503 Input Parameters: 4504 + A - the matrix 4505 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4506 - row, col - index sets of rows and columns to extract (or NULL) 4507 4508 Output Parameter: 4509 . A_loc - the local sequential matrix generated 4510 4511 Level: developer 4512 4513 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4514 4515 @*/ 4516 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4517 { 4518 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4519 PetscErrorCode ierr; 4520 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4521 IS isrowa,iscola; 4522 Mat *aloc; 4523 PetscBool match; 4524 4525 PetscFunctionBegin; 4526 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4527 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4528 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4529 if (!row) { 4530 start = A->rmap->rstart; end = A->rmap->rend; 4531 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4532 } else { 4533 isrowa = *row; 4534 } 4535 if (!col) { 4536 start = A->cmap->rstart; 4537 cmap = a->garray; 4538 nzA = a->A->cmap->n; 4539 nzB = a->B->cmap->n; 4540 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4541 ncols = 0; 4542 for (i=0; i<nzB; i++) { 4543 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4544 else break; 4545 } 4546 imark = i; 4547 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4548 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4549 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4550 } else { 4551 iscola = *col; 4552 } 4553 if (scall != MAT_INITIAL_MATRIX) { 4554 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 4555 aloc[0] = *A_loc; 4556 } 4557 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4558 *A_loc = aloc[0]; 4559 ierr = PetscFree(aloc);CHKERRQ(ierr); 4560 if (!row) { 4561 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4562 } 4563 if (!col) { 4564 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4565 } 4566 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4567 PetscFunctionReturn(0); 4568 } 4569 4570 #undef __FUNCT__ 4571 #define __FUNCT__ "MatGetBrowsOfAcols" 4572 /*@C 4573 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4574 4575 Collective on Mat 4576 4577 Input Parameters: 4578 + A,B - the matrices in mpiaij format 4579 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4580 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4581 4582 Output Parameter: 4583 + rowb, colb - index sets of rows and columns of B to extract 4584 - B_seq - the sequential matrix generated 4585 4586 Level: developer 4587 4588 @*/ 4589 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4590 { 4591 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4592 PetscErrorCode ierr; 4593 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4594 IS isrowb,iscolb; 4595 Mat *bseq=NULL; 4596 4597 PetscFunctionBegin; 4598 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4599 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4600 } 4601 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4602 4603 if (scall == MAT_INITIAL_MATRIX) { 4604 start = A->cmap->rstart; 4605 cmap = a->garray; 4606 nzA = a->A->cmap->n; 4607 nzB = a->B->cmap->n; 4608 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 4609 ncols = 0; 4610 for (i=0; i<nzB; i++) { /* row < local row index */ 4611 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4612 else break; 4613 } 4614 imark = i; 4615 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4616 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4617 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4618 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4619 } else { 4620 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4621 isrowb = *rowb; iscolb = *colb; 4622 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 4623 bseq[0] = *B_seq; 4624 } 4625 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4626 *B_seq = bseq[0]; 4627 ierr = PetscFree(bseq);CHKERRQ(ierr); 4628 if (!rowb) { 4629 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4630 } else { 4631 *rowb = isrowb; 4632 } 4633 if (!colb) { 4634 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4635 } else { 4636 *colb = iscolb; 4637 } 4638 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4639 PetscFunctionReturn(0); 4640 } 4641 4642 #undef __FUNCT__ 4643 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4644 /* 4645 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4646 of the OFF-DIAGONAL portion of local A 4647 4648 Collective on Mat 4649 4650 Input Parameters: 4651 + A,B - the matrices in mpiaij format 4652 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4653 4654 Output Parameter: 4655 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4656 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4657 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4658 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4659 4660 Level: developer 4661 4662 */ 4663 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4664 { 4665 VecScatter_MPI_General *gen_to,*gen_from; 4666 PetscErrorCode ierr; 4667 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4668 Mat_SeqAIJ *b_oth; 4669 VecScatter ctx =a->Mvctx; 4670 MPI_Comm comm; 4671 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4672 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4673 PetscScalar *rvalues,*svalues; 4674 MatScalar *b_otha,*bufa,*bufA; 4675 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4676 MPI_Request *rwaits = NULL,*swaits = NULL; 4677 MPI_Status *sstatus,rstatus; 4678 PetscMPIInt jj,size; 4679 PetscInt *cols,sbs,rbs; 4680 PetscScalar *vals; 4681 4682 PetscFunctionBegin; 4683 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4684 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4685 if (size == 1) PetscFunctionReturn(0); 4686 4687 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4688 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4689 } 4690 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4691 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4692 4693 gen_to = (VecScatter_MPI_General*)ctx->todata; 4694 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4695 rvalues = gen_from->values; /* holds the length of receiving row */ 4696 svalues = gen_to->values; /* holds the length of sending row */ 4697 nrecvs = gen_from->n; 4698 nsends = gen_to->n; 4699 4700 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4701 srow = gen_to->indices; /* local row index to be sent */ 4702 sstarts = gen_to->starts; 4703 sprocs = gen_to->procs; 4704 sstatus = gen_to->sstatus; 4705 sbs = gen_to->bs; 4706 rstarts = gen_from->starts; 4707 rprocs = gen_from->procs; 4708 rbs = gen_from->bs; 4709 4710 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4711 if (scall == MAT_INITIAL_MATRIX) { 4712 /* i-array */ 4713 /*---------*/ 4714 /* post receives */ 4715 for (i=0; i<nrecvs; i++) { 4716 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4717 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4718 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4719 } 4720 4721 /* pack the outgoing message */ 4722 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4723 4724 sstartsj[0] = 0; 4725 rstartsj[0] = 0; 4726 len = 0; /* total length of j or a array to be sent */ 4727 k = 0; 4728 for (i=0; i<nsends; i++) { 4729 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4730 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4731 for (j=0; j<nrows; j++) { 4732 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4733 for (l=0; l<sbs; l++) { 4734 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4735 4736 rowlen[j*sbs+l] = ncols; 4737 4738 len += ncols; 4739 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4740 } 4741 k++; 4742 } 4743 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4744 4745 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4746 } 4747 /* recvs and sends of i-array are completed */ 4748 i = nrecvs; 4749 while (i--) { 4750 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4751 } 4752 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4753 4754 /* allocate buffers for sending j and a arrays */ 4755 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 4756 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 4757 4758 /* create i-array of B_oth */ 4759 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 4760 4761 b_othi[0] = 0; 4762 len = 0; /* total length of j or a array to be received */ 4763 k = 0; 4764 for (i=0; i<nrecvs; i++) { 4765 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4766 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 4767 for (j=0; j<nrows; j++) { 4768 b_othi[k+1] = b_othi[k] + rowlen[j]; 4769 len += rowlen[j]; k++; 4770 } 4771 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4772 } 4773 4774 /* allocate space for j and a arrrays of B_oth */ 4775 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 4776 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 4777 4778 /* j-array */ 4779 /*---------*/ 4780 /* post receives of j-array */ 4781 for (i=0; i<nrecvs; i++) { 4782 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4783 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4784 } 4785 4786 /* pack the outgoing message j-array */ 4787 k = 0; 4788 for (i=0; i<nsends; i++) { 4789 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4790 bufJ = bufj+sstartsj[i]; 4791 for (j=0; j<nrows; j++) { 4792 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4793 for (ll=0; ll<sbs; ll++) { 4794 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4795 for (l=0; l<ncols; l++) { 4796 *bufJ++ = cols[l]; 4797 } 4798 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4799 } 4800 } 4801 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4802 } 4803 4804 /* recvs and sends of j-array are completed */ 4805 i = nrecvs; 4806 while (i--) { 4807 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4808 } 4809 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4810 } else if (scall == MAT_REUSE_MATRIX) { 4811 sstartsj = *startsj_s; 4812 rstartsj = *startsj_r; 4813 bufa = *bufa_ptr; 4814 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4815 b_otha = b_oth->a; 4816 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4817 4818 /* a-array */ 4819 /*---------*/ 4820 /* post receives of a-array */ 4821 for (i=0; i<nrecvs; i++) { 4822 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4823 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4824 } 4825 4826 /* pack the outgoing message a-array */ 4827 k = 0; 4828 for (i=0; i<nsends; i++) { 4829 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4830 bufA = bufa+sstartsj[i]; 4831 for (j=0; j<nrows; j++) { 4832 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4833 for (ll=0; ll<sbs; ll++) { 4834 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4835 for (l=0; l<ncols; l++) { 4836 *bufA++ = vals[l]; 4837 } 4838 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4839 } 4840 } 4841 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4842 } 4843 /* recvs and sends of a-array are completed */ 4844 i = nrecvs; 4845 while (i--) { 4846 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4847 } 4848 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4849 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4850 4851 if (scall == MAT_INITIAL_MATRIX) { 4852 /* put together the new matrix */ 4853 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4854 4855 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4856 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4857 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4858 b_oth->free_a = PETSC_TRUE; 4859 b_oth->free_ij = PETSC_TRUE; 4860 b_oth->nonew = 0; 4861 4862 ierr = PetscFree(bufj);CHKERRQ(ierr); 4863 if (!startsj_s || !bufa_ptr) { 4864 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4865 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4866 } else { 4867 *startsj_s = sstartsj; 4868 *startsj_r = rstartsj; 4869 *bufa_ptr = bufa; 4870 } 4871 } 4872 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 #undef __FUNCT__ 4877 #define __FUNCT__ "MatGetCommunicationStructs" 4878 /*@C 4879 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4880 4881 Not Collective 4882 4883 Input Parameters: 4884 . A - The matrix in mpiaij format 4885 4886 Output Parameter: 4887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4888 . colmap - A map from global column index to local index into lvec 4889 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4890 4891 Level: developer 4892 4893 @*/ 4894 #if defined(PETSC_USE_CTABLE) 4895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4896 #else 4897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4898 #endif 4899 { 4900 Mat_MPIAIJ *a; 4901 4902 PetscFunctionBegin; 4903 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4904 PetscValidPointer(lvec, 2); 4905 PetscValidPointer(colmap, 3); 4906 PetscValidPointer(multScatter, 4); 4907 a = (Mat_MPIAIJ*) A->data; 4908 if (lvec) *lvec = a->lvec; 4909 if (colmap) *colmap = a->colmap; 4910 if (multScatter) *multScatter = a->Mvctx; 4911 PetscFunctionReturn(0); 4912 } 4913 4914 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4915 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4916 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4917 #if defined(PETSC_HAVE_ELEMENTAL) 4918 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4919 #endif 4920 4921 #undef __FUNCT__ 4922 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4923 /* 4924 Computes (B'*A')' since computing B*A directly is untenable 4925 4926 n p p 4927 ( ) ( ) ( ) 4928 m ( A ) * n ( B ) = m ( C ) 4929 ( ) ( ) ( ) 4930 4931 */ 4932 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4933 { 4934 PetscErrorCode ierr; 4935 Mat At,Bt,Ct; 4936 4937 PetscFunctionBegin; 4938 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4939 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4940 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4941 ierr = MatDestroy(&At);CHKERRQ(ierr); 4942 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4943 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4944 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4945 PetscFunctionReturn(0); 4946 } 4947 4948 #undef __FUNCT__ 4949 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4950 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4951 { 4952 PetscErrorCode ierr; 4953 PetscInt m=A->rmap->n,n=B->cmap->n; 4954 Mat Cmat; 4955 4956 PetscFunctionBegin; 4957 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4958 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4959 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4960 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4961 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4962 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4963 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4964 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4965 4966 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4967 4968 *C = Cmat; 4969 PetscFunctionReturn(0); 4970 } 4971 4972 /* ----------------------------------------------------------------*/ 4973 #undef __FUNCT__ 4974 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4975 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4976 { 4977 PetscErrorCode ierr; 4978 4979 PetscFunctionBegin; 4980 if (scall == MAT_INITIAL_MATRIX) { 4981 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4982 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4983 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4984 } 4985 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4986 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4987 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 /*MC 4992 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4993 4994 Options Database Keys: 4995 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4996 4997 Level: beginner 4998 4999 .seealso: MatCreateAIJ() 5000 M*/ 5001 5002 #undef __FUNCT__ 5003 #define __FUNCT__ "MatCreate_MPIAIJ" 5004 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5005 { 5006 Mat_MPIAIJ *b; 5007 PetscErrorCode ierr; 5008 PetscMPIInt size; 5009 5010 PetscFunctionBegin; 5011 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5012 5013 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5014 B->data = (void*)b; 5015 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5016 B->assembled = PETSC_FALSE; 5017 B->insertmode = NOT_SET_VALUES; 5018 b->size = size; 5019 5020 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5021 5022 /* build cache for off array entries formed */ 5023 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5024 5025 b->donotstash = PETSC_FALSE; 5026 b->colmap = 0; 5027 b->garray = 0; 5028 b->roworiented = PETSC_TRUE; 5029 5030 /* stuff used for matrix vector multiply */ 5031 b->lvec = NULL; 5032 b->Mvctx = NULL; 5033 5034 /* stuff for MatGetRow() */ 5035 b->rowindices = 0; 5036 b->rowvalues = 0; 5037 b->getrowactive = PETSC_FALSE; 5038 5039 /* flexible pointer used in CUSP/CUSPARSE classes */ 5040 b->spptr = NULL; 5041 5042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5052 #if defined(PETSC_HAVE_ELEMENTAL) 5053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5054 #endif 5055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5058 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5059 PetscFunctionReturn(0); 5060 } 5061 5062 #undef __FUNCT__ 5063 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5064 /*@C 5065 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5066 and "off-diagonal" part of the matrix in CSR format. 5067 5068 Collective on MPI_Comm 5069 5070 Input Parameters: 5071 + comm - MPI communicator 5072 . m - number of local rows (Cannot be PETSC_DECIDE) 5073 . n - This value should be the same as the local size used in creating the 5074 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5075 calculated if N is given) For square matrices n is almost always m. 5076 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5077 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5078 . i - row indices for "diagonal" portion of matrix 5079 . j - column indices 5080 . a - matrix values 5081 . oi - row indices for "off-diagonal" portion of matrix 5082 . oj - column indices 5083 - oa - matrix values 5084 5085 Output Parameter: 5086 . mat - the matrix 5087 5088 Level: advanced 5089 5090 Notes: 5091 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5092 must free the arrays once the matrix has been destroyed and not before. 5093 5094 The i and j indices are 0 based 5095 5096 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5097 5098 This sets local rows and cannot be used to set off-processor values. 5099 5100 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5101 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5102 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5103 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5104 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5105 communication if it is known that only local entries will be set. 5106 5107 .keywords: matrix, aij, compressed row, sparse, parallel 5108 5109 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5110 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5111 C@*/ 5112 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5113 { 5114 PetscErrorCode ierr; 5115 Mat_MPIAIJ *maij; 5116 5117 PetscFunctionBegin; 5118 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5119 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5120 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5121 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5122 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5123 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5124 maij = (Mat_MPIAIJ*) (*mat)->data; 5125 5126 (*mat)->preallocated = PETSC_TRUE; 5127 5128 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5129 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5130 5131 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5132 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5133 5134 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5135 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5136 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5137 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5138 5139 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5140 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5141 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5142 PetscFunctionReturn(0); 5143 } 5144 5145 /* 5146 Special version for direct calls from Fortran 5147 */ 5148 #include <petsc-private/fortranimpl.h> 5149 5150 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5151 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5152 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5153 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5154 #endif 5155 5156 /* Change these macros so can be used in void function */ 5157 #undef CHKERRQ 5158 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5159 #undef SETERRQ2 5160 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5161 #undef SETERRQ3 5162 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5163 #undef SETERRQ 5164 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5165 5166 #undef __FUNCT__ 5167 #define __FUNCT__ "matsetvaluesmpiaij_" 5168 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5169 { 5170 Mat mat = *mmat; 5171 PetscInt m = *mm, n = *mn; 5172 InsertMode addv = *maddv; 5173 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5174 PetscScalar value; 5175 PetscErrorCode ierr; 5176 5177 MatCheckPreallocated(mat,1); 5178 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5179 5180 #if defined(PETSC_USE_DEBUG) 5181 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5182 #endif 5183 { 5184 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5185 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5186 PetscBool roworiented = aij->roworiented; 5187 5188 /* Some Variables required in the macro */ 5189 Mat A = aij->A; 5190 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5191 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5192 MatScalar *aa = a->a; 5193 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5194 Mat B = aij->B; 5195 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5196 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5197 MatScalar *ba = b->a; 5198 5199 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5200 PetscInt nonew = a->nonew; 5201 MatScalar *ap1,*ap2; 5202 5203 PetscFunctionBegin; 5204 for (i=0; i<m; i++) { 5205 if (im[i] < 0) continue; 5206 #if defined(PETSC_USE_DEBUG) 5207 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5208 #endif 5209 if (im[i] >= rstart && im[i] < rend) { 5210 row = im[i] - rstart; 5211 lastcol1 = -1; 5212 rp1 = aj + ai[row]; 5213 ap1 = aa + ai[row]; 5214 rmax1 = aimax[row]; 5215 nrow1 = ailen[row]; 5216 low1 = 0; 5217 high1 = nrow1; 5218 lastcol2 = -1; 5219 rp2 = bj + bi[row]; 5220 ap2 = ba + bi[row]; 5221 rmax2 = bimax[row]; 5222 nrow2 = bilen[row]; 5223 low2 = 0; 5224 high2 = nrow2; 5225 5226 for (j=0; j<n; j++) { 5227 if (roworiented) value = v[i*n+j]; 5228 else value = v[i+j*m]; 5229 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5230 if (in[j] >= cstart && in[j] < cend) { 5231 col = in[j] - cstart; 5232 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5233 } else if (in[j] < 0) continue; 5234 #if defined(PETSC_USE_DEBUG) 5235 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5236 #endif 5237 else { 5238 if (mat->was_assembled) { 5239 if (!aij->colmap) { 5240 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5241 } 5242 #if defined(PETSC_USE_CTABLE) 5243 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5244 col--; 5245 #else 5246 col = aij->colmap[in[j]] - 1; 5247 #endif 5248 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5249 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5250 col = in[j]; 5251 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5252 B = aij->B; 5253 b = (Mat_SeqAIJ*)B->data; 5254 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5255 rp2 = bj + bi[row]; 5256 ap2 = ba + bi[row]; 5257 rmax2 = bimax[row]; 5258 nrow2 = bilen[row]; 5259 low2 = 0; 5260 high2 = nrow2; 5261 bm = aij->B->rmap->n; 5262 ba = b->a; 5263 } 5264 } else col = in[j]; 5265 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5266 } 5267 } 5268 } else if (!aij->donotstash) { 5269 if (roworiented) { 5270 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5271 } else { 5272 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5273 } 5274 } 5275 } 5276 } 5277 PetscFunctionReturnVoid(); 5278 } 5279 5280