1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) { 526 if (roworiented) value = v[i*n+j]; 527 else value = v[i+j*m]; 528 } else value = 0.0; 529 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 530 if (in[j] >= cstart && in[j] < cend) { 531 col = in[j] - cstart; 532 nonew = a->nonew; 533 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 534 } else if (in[j] < 0) continue; 535 #if defined(PETSC_USE_DEBUG) 536 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 537 #endif 538 else { 539 if (mat->was_assembled) { 540 if (!aij->colmap) { 541 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined(PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 545 col--; 546 #else 547 col = aij->colmap[in[j]] - 1; 548 #endif 549 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 550 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 551 col = in[j]; 552 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 553 B = aij->B; 554 b = (Mat_SeqAIJ*)B->data; 555 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 556 rp2 = bj + bi[row]; 557 ap2 = ba + bi[row]; 558 rmax2 = bimax[row]; 559 nrow2 = bilen[row]; 560 low2 = 0; 561 high2 = nrow2; 562 bm = aij->B->rmap->n; 563 ba = b->a; 564 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 565 } else col = in[j]; 566 nonew = b->nonew; 567 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 568 } 569 } 570 } else { 571 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 572 if (!aij->donotstash) { 573 mat->assembled = PETSC_FALSE; 574 if (roworiented) { 575 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } else { 577 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } 579 } 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatGetValues_MPIAIJ" 587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 PetscErrorCode ierr; 591 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 592 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 593 594 PetscFunctionBegin; 595 for (i=0; i<m; i++) { 596 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 597 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 598 if (idxm[i] >= rstart && idxm[i] < rend) { 599 row = idxm[i] - rstart; 600 for (j=0; j<n; j++) { 601 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 602 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 603 if (idxn[j] >= cstart && idxn[j] < cend) { 604 col = idxn[j] - cstart; 605 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 606 } else { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[idxn[j]] - 1; 615 #endif 616 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 617 else { 618 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 619 } 620 } 621 } 622 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 PetscErrorCode ierr; 635 PetscInt nstash,reallocs; 636 InsertMode addv; 637 638 PetscFunctionBegin; 639 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 640 641 /* make sure all processors are either in INSERTMODE or ADDMODE */ 642 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 643 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 644 mat->insertmode = addv; /* in case this processor had no cache */ 645 646 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 647 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 648 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 658 PetscErrorCode ierr; 659 PetscMPIInt n; 660 PetscInt i,j,rstart,ncols,flg; 661 PetscInt *row,*col; 662 PetscBool other_disassembled; 663 PetscScalar *val; 664 InsertMode addv = mat->insertmode; 665 666 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 667 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n; ) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { 677 if (row[j] != rstart) break; 678 } 679 if (j < n) ncols = j-i; 680 else ncols = n-i; 681 /* Now assemble all these values with a single function call */ 682 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 683 684 i = j; 685 } 686 } 687 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 691 692 /* determine if any processor has disassembled, if so we must 693 also disassemble ourselfs, in order that we may reassemble. */ 694 /* 695 if nonzero structure of submatrix B cannot change then we know that 696 no processor disassembled thus we can skip this stuff 697 */ 698 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 699 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 700 if (mat->was_assembled && !other_disassembled) { 701 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 702 } 703 } 704 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 705 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 710 711 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 712 713 aij->rowvalues = 0; 714 715 /* used by MatAXPY() */ 716 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 717 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 718 719 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 720 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 721 722 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 723 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 724 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 725 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 726 } 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 748 PetscInt *owners = A->rmap->range; 749 PetscInt n = A->rmap->n; 750 PetscMPIInt size = mat->size; 751 PetscSF sf; 752 PetscInt *lrows; 753 PetscSFNode *rrows; 754 PetscInt lastidx = -1, r, p = 0, len = 0; 755 PetscErrorCode ierr; 756 757 PetscFunctionBegin; 758 /* Create SF where leaves are input rows and roots are owned rows */ 759 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 760 for (r = 0; r < n; ++r) lrows[r] = -1; 761 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 762 for (r = 0; r < N; ++r) { 763 const PetscInt idx = rows[r]; 764 PetscBool found = PETSC_FALSE; 765 /* Trick for efficient searching for sorted rows */ 766 if (lastidx > idx) p = 0; 767 lastidx = idx; 768 for (; p < size; ++p) { 769 if (idx >= owners[p] && idx < owners[p+1]) { 770 rrows[r].rank = p; 771 rrows[r].index = rows[r] - owners[p]; 772 found = PETSC_TRUE; 773 break; 774 } 775 } 776 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 777 } 778 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 779 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 780 /* Collect flags for rows to be zeroed */ 781 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 783 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 784 /* Compress and put in row numbers */ 785 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 786 /* fix right hand side if needed */ 787 if (x && b) { 788 const PetscScalar *xx; 789 PetscScalar *bb; 790 791 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 792 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 793 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 794 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 795 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 796 } 797 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 798 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 799 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 800 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 801 } else if (diag != 0.0) { 802 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 803 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 804 for (r = 0; r < len; ++r) { 805 const PetscInt row = lrows[r] + A->rmap->rstart; 806 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 807 } 808 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 810 } else { 811 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 812 } 813 ierr = PetscFree(lrows);CHKERRQ(ierr); 814 815 /* only change matrix nonzero state if pattern was allowed to be changed */ 816 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 817 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 818 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 819 } 820 PetscFunctionReturn(0); 821 } 822 823 #undef __FUNCT__ 824 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 825 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 826 { 827 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 828 PetscErrorCode ierr; 829 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 830 PetscInt i,j,r,m,p = 0,len = 0; 831 PetscInt *lrows,*owners = A->rmap->range; 832 PetscSFNode *rrows; 833 PetscSF sf; 834 const PetscScalar *xx; 835 PetscScalar *bb,*mask; 836 Vec xmask,lmask; 837 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 838 const PetscInt *aj, *ii,*ridx; 839 PetscScalar *aa; 840 #if defined(PETSC_DEBUG) 841 PetscBool found = PETSC_FALSE; 842 #endif 843 844 PetscFunctionBegin; 845 /* Create SF where leaves are input rows and roots are owned rows */ 846 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 847 for (r = 0; r < n; ++r) lrows[r] = -1; 848 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 849 for (r = 0; r < N; ++r) { 850 const PetscInt idx = rows[r]; 851 PetscBool found = PETSC_FALSE; 852 /* Trick for efficient searching for sorted rows */ 853 if (lastidx > idx) p = 0; 854 lastidx = idx; 855 for (; p < size; ++p) { 856 if (idx >= owners[p] && idx < owners[p+1]) { 857 rrows[r].rank = p; 858 rrows[r].index = rows[r] - owners[p]; 859 found = PETSC_TRUE; 860 break; 861 } 862 } 863 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 864 } 865 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 866 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 867 /* Collect flags for rows to be zeroed */ 868 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 869 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 870 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 871 /* Compress and put in row numbers */ 872 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 873 /* zero diagonal part of matrix */ 874 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 875 /* handle off diagonal part of matrix */ 876 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 877 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 878 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 879 for (i=0; i<len; i++) bb[lrows[i]] = 1; 880 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 881 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 884 if (x) { 885 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 887 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 888 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 889 } 890 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 891 /* remove zeroed rows of off diagonal matrix */ 892 ii = aij->i; 893 for (i=0; i<len; i++) { 894 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 895 } 896 /* loop over all elements of off process part of matrix zeroing removed columns*/ 897 if (aij->compressedrow.use) { 898 m = aij->compressedrow.nrows; 899 ii = aij->compressedrow.i; 900 ridx = aij->compressedrow.rindex; 901 for (i=0; i<m; i++) { 902 n = ii[i+1] - ii[i]; 903 aj = aij->j + ii[i]; 904 aa = aij->a + ii[i]; 905 906 for (j=0; j<n; j++) { 907 if (PetscAbsScalar(mask[*aj])) { 908 if (b) bb[*ridx] -= *aa*xx[*aj]; 909 *aa = 0.0; 910 } 911 aa++; 912 aj++; 913 } 914 ridx++; 915 } 916 } else { /* do not use compressed row format */ 917 m = l->B->rmap->n; 918 for (i=0; i<m; i++) { 919 n = ii[i+1] - ii[i]; 920 aj = aij->j + ii[i]; 921 aa = aij->a + ii[i]; 922 for (j=0; j<n; j++) { 923 if (PetscAbsScalar(mask[*aj])) { 924 if (b) bb[i] -= *aa*xx[*aj]; 925 *aa = 0.0; 926 } 927 aa++; 928 aj++; 929 } 930 } 931 } 932 if (x) { 933 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 934 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 935 } 936 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 937 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 938 ierr = PetscFree(lrows);CHKERRQ(ierr); 939 940 /* only change matrix nonzero state if pattern was allowed to be changed */ 941 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 942 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 943 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 #undef __FUNCT__ 949 #define __FUNCT__ "MatMult_MPIAIJ" 950 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 951 { 952 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 953 PetscErrorCode ierr; 954 PetscInt nt; 955 956 PetscFunctionBegin; 957 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 958 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 959 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 960 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 961 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 962 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 963 PetscFunctionReturn(0); 964 } 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 968 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 969 { 970 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatMultAdd_MPIAIJ" 980 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 981 { 982 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 983 PetscErrorCode ierr; 984 985 PetscFunctionBegin; 986 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 987 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 988 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 998 PetscErrorCode ierr; 999 PetscBool merged; 1000 1001 PetscFunctionBegin; 1002 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1003 /* do nondiagonal part */ 1004 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1005 if (!merged) { 1006 /* send it on its way */ 1007 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1008 /* do local part */ 1009 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1010 /* receive remote parts: note this assumes the values are not actually */ 1011 /* added in yy until the next line, */ 1012 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1013 } else { 1014 /* do local part */ 1015 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1016 /* send it on its way */ 1017 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1018 /* values actually were received in the Begin() but we need to call this nop */ 1019 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1026 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1027 { 1028 MPI_Comm comm; 1029 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1030 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1031 IS Me,Notme; 1032 PetscErrorCode ierr; 1033 PetscInt M,N,first,last,*notme,i; 1034 PetscMPIInt size; 1035 1036 PetscFunctionBegin; 1037 /* Easy test: symmetric diagonal block */ 1038 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1039 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1040 if (!*f) PetscFunctionReturn(0); 1041 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1042 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1043 if (size == 1) PetscFunctionReturn(0); 1044 1045 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1046 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1047 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1048 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1049 for (i=0; i<first; i++) notme[i] = i; 1050 for (i=last; i<M; i++) notme[i-last+first] = i; 1051 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1052 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1053 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1054 Aoff = Aoffs[0]; 1055 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1056 Boff = Boffs[0]; 1057 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1058 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1059 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1060 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1061 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1062 ierr = PetscFree(notme);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1068 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* do nondiagonal part */ 1075 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1076 /* send it on its way */ 1077 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 /* do local part */ 1079 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1080 /* receive remote parts */ 1081 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1082 PetscFunctionReturn(0); 1083 } 1084 1085 /* 1086 This only works correctly for square matrices where the subblock A->A is the 1087 diagonal block 1088 */ 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1091 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1092 { 1093 PetscErrorCode ierr; 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1095 1096 PetscFunctionBegin; 1097 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1098 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1099 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1100 PetscFunctionReturn(0); 1101 } 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatScale_MPIAIJ" 1105 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1112 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatDestroy_MPIAIJ" 1118 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1119 { 1120 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1121 PetscErrorCode ierr; 1122 1123 PetscFunctionBegin; 1124 #if defined(PETSC_USE_LOG) 1125 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1126 #endif 1127 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1129 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1130 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1131 #if defined(PETSC_USE_CTABLE) 1132 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1133 #else 1134 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1135 #endif 1136 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1137 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1138 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1139 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1140 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1141 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1142 1143 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1144 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1145 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1146 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1157 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1158 { 1159 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1160 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1161 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1162 PetscErrorCode ierr; 1163 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1164 int fd; 1165 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1166 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1167 PetscScalar *column_values; 1168 PetscInt message_count,flowcontrolcount; 1169 FILE *file; 1170 1171 PetscFunctionBegin; 1172 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1173 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1174 nz = A->nz + B->nz; 1175 if (!rank) { 1176 header[0] = MAT_FILE_CLASSID; 1177 header[1] = mat->rmap->N; 1178 header[2] = mat->cmap->N; 1179 1180 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1181 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1182 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1183 /* get largest number of rows any processor has */ 1184 rlen = mat->rmap->n; 1185 range = mat->rmap->range; 1186 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1187 } else { 1188 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1189 rlen = mat->rmap->n; 1190 } 1191 1192 /* load up the local row counts */ 1193 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1194 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1195 1196 /* store the row lengths to the file */ 1197 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1198 if (!rank) { 1199 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1200 for (i=1; i<size; i++) { 1201 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1202 rlen = range[i+1] - range[i]; 1203 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 } 1206 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1207 } else { 1208 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1209 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1210 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1211 } 1212 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1213 1214 /* load up the local column indices */ 1215 nzmax = nz; /* th processor needs space a largest processor needs */ 1216 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1217 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1218 cnt = 0; 1219 for (i=0; i<mat->rmap->n; i++) { 1220 for (j=B->i[i]; j<B->i[i+1]; j++) { 1221 if ((col = garray[B->j[j]]) > cstart) break; 1222 column_indices[cnt++] = col; 1223 } 1224 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1225 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1226 } 1227 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1228 1229 /* store the column indices to the file */ 1230 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1231 if (!rank) { 1232 MPI_Status status; 1233 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1234 for (i=1; i<size; i++) { 1235 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1236 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1237 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1238 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1240 } 1241 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1242 } else { 1243 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1244 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1245 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1246 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1247 } 1248 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1249 1250 /* load up the local column values */ 1251 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1252 cnt = 0; 1253 for (i=0; i<mat->rmap->n; i++) { 1254 for (j=B->i[i]; j<B->i[i+1]; j++) { 1255 if (garray[B->j[j]] > cstart) break; 1256 column_values[cnt++] = B->a[j]; 1257 } 1258 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1259 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1260 } 1261 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1262 1263 /* store the column values to the file */ 1264 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1265 if (!rank) { 1266 MPI_Status status; 1267 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1268 for (i=1; i<size; i++) { 1269 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1270 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1271 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1272 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1273 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1274 } 1275 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1276 } else { 1277 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1278 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1279 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1280 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1281 } 1282 ierr = PetscFree(column_values);CHKERRQ(ierr); 1283 1284 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1285 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 #include <petscdraw.h> 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1292 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1293 { 1294 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank = aij->rank,size = aij->size; 1297 PetscBool isdraw,iascii,isbinary; 1298 PetscViewer sviewer; 1299 PetscViewerFormat format; 1300 1301 PetscFunctionBegin; 1302 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1303 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1304 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1305 if (iascii) { 1306 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1307 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1308 MatInfo info; 1309 PetscBool inodes; 1310 1311 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1312 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1313 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1315 if (!inodes) { 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1317 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1318 } else { 1319 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1320 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1321 } 1322 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1323 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1324 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1325 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1326 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1327 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1328 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1329 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1332 PetscInt inodecount,inodelimit,*inodes; 1333 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1334 if (inodes) { 1335 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1336 } else { 1337 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1338 } 1339 PetscFunctionReturn(0); 1340 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1341 PetscFunctionReturn(0); 1342 } 1343 } else if (isbinary) { 1344 if (size == 1) { 1345 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1346 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1349 } 1350 PetscFunctionReturn(0); 1351 } else if (isdraw) { 1352 PetscDraw draw; 1353 PetscBool isnull; 1354 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1355 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1356 } 1357 1358 { 1359 /* assemble the entire matrix onto first processor. */ 1360 Mat A; 1361 Mat_SeqAIJ *Aloc; 1362 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1363 MatScalar *a; 1364 1365 if (mat->rmap->N > 1024) { 1366 PetscBool flg = PETSC_FALSE; 1367 1368 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1369 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1370 } 1371 1372 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1373 if (!rank) { 1374 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1375 } else { 1376 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1377 } 1378 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1379 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1380 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1381 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1382 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1383 1384 /* copy over the A part */ 1385 Aloc = (Mat_SeqAIJ*)aij->A->data; 1386 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1387 row = mat->rmap->rstart; 1388 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1389 for (i=0; i<m; i++) { 1390 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1391 row++; 1392 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1393 } 1394 aj = Aloc->j; 1395 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1396 1397 /* copy over the B part */ 1398 Aloc = (Mat_SeqAIJ*)aij->B->data; 1399 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1400 row = mat->rmap->rstart; 1401 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1402 ct = cols; 1403 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1408 } 1409 ierr = PetscFree(ct);CHKERRQ(ierr); 1410 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1411 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 /* 1413 Everyone has to call to draw the matrix since the graphics waits are 1414 synchronized across all processors that share the PetscDraw object 1415 */ 1416 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1417 if (!rank) { 1418 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1419 } 1420 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1421 ierr = MatDestroy(&A);CHKERRQ(ierr); 1422 } 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatView_MPIAIJ" 1428 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1429 { 1430 PetscErrorCode ierr; 1431 PetscBool iascii,isdraw,issocket,isbinary; 1432 1433 PetscFunctionBegin; 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1435 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1436 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1437 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1438 if (iascii || isdraw || isbinary || issocket) { 1439 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatSOR_MPIAIJ" 1446 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1447 { 1448 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1449 PetscErrorCode ierr; 1450 Vec bb1 = 0; 1451 PetscBool hasop; 1452 1453 PetscFunctionBegin; 1454 if (flag == SOR_APPLY_UPPER) { 1455 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } 1458 1459 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1460 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1461 } 1462 1463 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1464 if (flag & SOR_ZERO_INITIAL_GUESS) { 1465 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1466 its--; 1467 } 1468 1469 while (its--) { 1470 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1471 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1472 1473 /* update rhs: bb1 = bb - B*x */ 1474 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1475 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1476 1477 /* local sweep */ 1478 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1479 } 1480 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1481 if (flag & SOR_ZERO_INITIAL_GUESS) { 1482 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1483 its--; 1484 } 1485 while (its--) { 1486 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1488 1489 /* update rhs: bb1 = bb - B*x */ 1490 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1491 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1492 1493 /* local sweep */ 1494 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1495 } 1496 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1497 if (flag & SOR_ZERO_INITIAL_GUESS) { 1498 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1499 its--; 1500 } 1501 while (its--) { 1502 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1504 1505 /* update rhs: bb1 = bb - B*x */ 1506 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1507 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1508 1509 /* local sweep */ 1510 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1511 } 1512 } else if (flag & SOR_EISENSTAT) { 1513 Vec xx1; 1514 1515 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1517 1518 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1520 if (!mat->diag) { 1521 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1522 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1523 } 1524 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1525 if (hasop) { 1526 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1527 } else { 1528 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1529 } 1530 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1531 1532 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1536 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1537 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1538 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1539 1540 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatPermute_MPIAIJ" 1546 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1547 { 1548 Mat aA,aB,Aperm; 1549 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1550 PetscScalar *aa,*ba; 1551 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1552 PetscSF rowsf,sf; 1553 IS parcolp = NULL; 1554 PetscBool done; 1555 PetscErrorCode ierr; 1556 1557 PetscFunctionBegin; 1558 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1559 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1560 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1561 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1562 1563 /* Invert row permutation to find out where my rows should go */ 1564 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1565 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1566 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1567 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1568 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1569 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1570 1571 /* Invert column permutation to find out where my columns should go */ 1572 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1573 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1574 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1575 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1576 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1577 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1578 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1579 1580 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1581 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1582 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1583 1584 /* Find out where my gcols should go */ 1585 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1586 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1591 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1592 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1593 1594 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1595 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1596 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1597 for (i=0; i<m; i++) { 1598 PetscInt row = rdest[i],rowner; 1599 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1600 for (j=ai[i]; j<ai[i+1]; j++) { 1601 PetscInt cowner,col = cdest[aj[j]]; 1602 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1603 if (rowner == cowner) dnnz[i]++; 1604 else onnz[i]++; 1605 } 1606 for (j=bi[i]; j<bi[i+1]; j++) { 1607 PetscInt cowner,col = gcdest[bj[j]]; 1608 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1609 if (rowner == cowner) dnnz[i]++; 1610 else onnz[i]++; 1611 } 1612 } 1613 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1614 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1615 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1616 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1618 1619 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1620 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1621 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1622 for (i=0; i<m; i++) { 1623 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1624 PetscInt j0,rowlen; 1625 rowlen = ai[i+1] - ai[i]; 1626 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1627 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1628 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1629 } 1630 rowlen = bi[i+1] - bi[i]; 1631 for (j0=j=0; j<rowlen; j0=j) { 1632 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1633 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1634 } 1635 } 1636 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1638 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1639 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1640 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1641 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1642 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1643 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1644 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1645 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1646 *B = Aperm; 1647 PetscFunctionReturn(0); 1648 } 1649 1650 #undef __FUNCT__ 1651 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1652 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1653 { 1654 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1655 Mat A = mat->A,B = mat->B; 1656 PetscErrorCode ierr; 1657 PetscReal isend[5],irecv[5]; 1658 1659 PetscFunctionBegin; 1660 info->block_size = 1.0; 1661 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1662 1663 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1664 isend[3] = info->memory; isend[4] = info->mallocs; 1665 1666 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1667 1668 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1669 isend[3] += info->memory; isend[4] += info->mallocs; 1670 if (flag == MAT_LOCAL) { 1671 info->nz_used = isend[0]; 1672 info->nz_allocated = isend[1]; 1673 info->nz_unneeded = isend[2]; 1674 info->memory = isend[3]; 1675 info->mallocs = isend[4]; 1676 } else if (flag == MAT_GLOBAL_MAX) { 1677 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1678 1679 info->nz_used = irecv[0]; 1680 info->nz_allocated = irecv[1]; 1681 info->nz_unneeded = irecv[2]; 1682 info->memory = irecv[3]; 1683 info->mallocs = irecv[4]; 1684 } else if (flag == MAT_GLOBAL_SUM) { 1685 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1686 1687 info->nz_used = irecv[0]; 1688 info->nz_allocated = irecv[1]; 1689 info->nz_unneeded = irecv[2]; 1690 info->memory = irecv[3]; 1691 info->mallocs = irecv[4]; 1692 } 1693 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1694 info->fill_ratio_needed = 0; 1695 info->factor_mallocs = 0; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 #undef __FUNCT__ 1700 #define __FUNCT__ "MatSetOption_MPIAIJ" 1701 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1702 { 1703 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1704 PetscErrorCode ierr; 1705 1706 PetscFunctionBegin; 1707 switch (op) { 1708 case MAT_NEW_NONZERO_LOCATIONS: 1709 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1710 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1711 case MAT_KEEP_NONZERO_PATTERN: 1712 case MAT_NEW_NONZERO_LOCATION_ERR: 1713 case MAT_USE_INODES: 1714 case MAT_IGNORE_ZERO_ENTRIES: 1715 MatCheckPreallocated(A,1); 1716 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1717 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1718 break; 1719 case MAT_ROW_ORIENTED: 1720 a->roworiented = flg; 1721 1722 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1723 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1724 break; 1725 case MAT_NEW_DIAGONALS: 1726 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1727 break; 1728 case MAT_IGNORE_OFF_PROC_ENTRIES: 1729 a->donotstash = flg; 1730 break; 1731 case MAT_SPD: 1732 A->spd_set = PETSC_TRUE; 1733 A->spd = flg; 1734 if (flg) { 1735 A->symmetric = PETSC_TRUE; 1736 A->structurally_symmetric = PETSC_TRUE; 1737 A->symmetric_set = PETSC_TRUE; 1738 A->structurally_symmetric_set = PETSC_TRUE; 1739 } 1740 break; 1741 case MAT_SYMMETRIC: 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_STRUCTURALLY_SYMMETRIC: 1745 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1746 break; 1747 case MAT_HERMITIAN: 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_SYMMETRY_ETERNAL: 1751 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1752 break; 1753 default: 1754 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 #undef __FUNCT__ 1760 #define __FUNCT__ "MatGetRow_MPIAIJ" 1761 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1762 { 1763 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1764 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1765 PetscErrorCode ierr; 1766 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1767 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1768 PetscInt *cmap,*idx_p; 1769 1770 PetscFunctionBegin; 1771 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1772 mat->getrowactive = PETSC_TRUE; 1773 1774 if (!mat->rowvalues && (idx || v)) { 1775 /* 1776 allocate enough space to hold information from the longest row. 1777 */ 1778 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1779 PetscInt max = 1,tmp; 1780 for (i=0; i<matin->rmap->n; i++) { 1781 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1782 if (max < tmp) max = tmp; 1783 } 1784 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1785 } 1786 1787 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1788 lrow = row - rstart; 1789 1790 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1791 if (!v) {pvA = 0; pvB = 0;} 1792 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1793 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1794 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1795 nztot = nzA + nzB; 1796 1797 cmap = mat->garray; 1798 if (v || idx) { 1799 if (nztot) { 1800 /* Sort by increasing column numbers, assuming A and B already sorted */ 1801 PetscInt imark = -1; 1802 if (v) { 1803 *v = v_p = mat->rowvalues; 1804 for (i=0; i<nzB; i++) { 1805 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1806 else break; 1807 } 1808 imark = i; 1809 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1810 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1811 } 1812 if (idx) { 1813 *idx = idx_p = mat->rowindices; 1814 if (imark > -1) { 1815 for (i=0; i<imark; i++) { 1816 idx_p[i] = cmap[cworkB[i]]; 1817 } 1818 } else { 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1821 else break; 1822 } 1823 imark = i; 1824 } 1825 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1826 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1827 } 1828 } else { 1829 if (idx) *idx = 0; 1830 if (v) *v = 0; 1831 } 1832 } 1833 *nz = nztot; 1834 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1835 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1836 PetscFunctionReturn(0); 1837 } 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1841 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1842 { 1843 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1844 1845 PetscFunctionBegin; 1846 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1847 aij->getrowactive = PETSC_FALSE; 1848 PetscFunctionReturn(0); 1849 } 1850 1851 #undef __FUNCT__ 1852 #define __FUNCT__ "MatNorm_MPIAIJ" 1853 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1854 { 1855 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1856 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1857 PetscErrorCode ierr; 1858 PetscInt i,j,cstart = mat->cmap->rstart; 1859 PetscReal sum = 0.0; 1860 MatScalar *v; 1861 1862 PetscFunctionBegin; 1863 if (aij->size == 1) { 1864 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1865 } else { 1866 if (type == NORM_FROBENIUS) { 1867 v = amat->a; 1868 for (i=0; i<amat->nz; i++) { 1869 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1870 } 1871 v = bmat->a; 1872 for (i=0; i<bmat->nz; i++) { 1873 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1874 } 1875 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1876 *norm = PetscSqrtReal(*norm); 1877 } else if (type == NORM_1) { /* max column norm */ 1878 PetscReal *tmp,*tmp2; 1879 PetscInt *jj,*garray = aij->garray; 1880 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1881 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1882 *norm = 0.0; 1883 v = amat->a; jj = amat->j; 1884 for (j=0; j<amat->nz; j++) { 1885 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1886 } 1887 v = bmat->a; jj = bmat->j; 1888 for (j=0; j<bmat->nz; j++) { 1889 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1890 } 1891 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1892 for (j=0; j<mat->cmap->N; j++) { 1893 if (tmp2[j] > *norm) *norm = tmp2[j]; 1894 } 1895 ierr = PetscFree(tmp);CHKERRQ(ierr); 1896 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1897 } else if (type == NORM_INFINITY) { /* max row norm */ 1898 PetscReal ntemp = 0.0; 1899 for (j=0; j<aij->A->rmap->n; j++) { 1900 v = amat->a + amat->i[j]; 1901 sum = 0.0; 1902 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1903 sum += PetscAbsScalar(*v); v++; 1904 } 1905 v = bmat->a + bmat->i[j]; 1906 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1907 sum += PetscAbsScalar(*v); v++; 1908 } 1909 if (sum > ntemp) ntemp = sum; 1910 } 1911 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1912 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatTranspose_MPIAIJ" 1919 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1920 { 1921 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1922 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1923 PetscErrorCode ierr; 1924 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1925 PetscInt cstart = A->cmap->rstart,ncol; 1926 Mat B; 1927 MatScalar *array; 1928 1929 PetscFunctionBegin; 1930 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1931 1932 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1933 ai = Aloc->i; aj = Aloc->j; 1934 bi = Bloc->i; bj = Bloc->j; 1935 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1936 PetscInt *d_nnz,*g_nnz,*o_nnz; 1937 PetscSFNode *oloc; 1938 PETSC_UNUSED PetscSF sf; 1939 1940 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1941 /* compute d_nnz for preallocation */ 1942 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1943 for (i=0; i<ai[ma]; i++) { 1944 d_nnz[aj[i]]++; 1945 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1946 } 1947 /* compute local off-diagonal contributions */ 1948 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1949 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1950 /* map those to global */ 1951 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1952 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1953 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1954 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1955 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1956 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1957 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1958 1959 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1960 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1961 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1962 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1963 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1964 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1965 } else { 1966 B = *matout; 1967 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1968 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1969 } 1970 1971 /* copy over the A part */ 1972 array = Aloc->a; 1973 row = A->rmap->rstart; 1974 for (i=0; i<ma; i++) { 1975 ncol = ai[i+1]-ai[i]; 1976 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1977 row++; 1978 array += ncol; aj += ncol; 1979 } 1980 aj = Aloc->j; 1981 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1982 1983 /* copy over the B part */ 1984 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1985 array = Bloc->a; 1986 row = A->rmap->rstart; 1987 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1988 cols_tmp = cols; 1989 for (i=0; i<mb; i++) { 1990 ncol = bi[i+1]-bi[i]; 1991 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; cols_tmp += ncol; 1994 } 1995 ierr = PetscFree(cols);CHKERRQ(ierr); 1996 1997 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1998 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1999 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2000 *matout = B; 2001 } else { 2002 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2009 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2010 { 2011 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2012 Mat a = aij->A,b = aij->B; 2013 PetscErrorCode ierr; 2014 PetscInt s1,s2,s3; 2015 2016 PetscFunctionBegin; 2017 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2018 if (rr) { 2019 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2020 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2021 /* Overlap communication with computation. */ 2022 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2023 } 2024 if (ll) { 2025 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2026 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2027 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2028 } 2029 /* scale the diagonal block */ 2030 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2031 2032 if (rr) { 2033 /* Do a scatter end and then right scale the off-diagonal block */ 2034 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2035 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2036 } 2037 PetscFunctionReturn(0); 2038 } 2039 2040 #undef __FUNCT__ 2041 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2042 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2043 { 2044 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2045 PetscErrorCode ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2049 PetscFunctionReturn(0); 2050 } 2051 2052 #undef __FUNCT__ 2053 #define __FUNCT__ "MatEqual_MPIAIJ" 2054 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2055 { 2056 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2057 Mat a,b,c,d; 2058 PetscBool flg; 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 a = matA->A; b = matA->B; 2063 c = matB->A; d = matB->B; 2064 2065 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2066 if (flg) { 2067 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2068 } 2069 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 #undef __FUNCT__ 2074 #define __FUNCT__ "MatCopy_MPIAIJ" 2075 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2076 { 2077 PetscErrorCode ierr; 2078 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2079 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2080 2081 PetscFunctionBegin; 2082 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2083 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2084 /* because of the column compression in the off-processor part of the matrix a->B, 2085 the number of columns in a->B and b->B may be different, hence we cannot call 2086 the MatCopy() directly on the two parts. If need be, we can provide a more 2087 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2088 then copying the submatrices */ 2089 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2090 } else { 2091 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2092 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2093 } 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatSetUp_MPIAIJ" 2099 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2100 { 2101 PetscErrorCode ierr; 2102 2103 PetscFunctionBegin; 2104 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2112 { 2113 PetscInt i,m=Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2116 const PetscInt *xi = x->i,*yi = y->i; 2117 2118 PetscFunctionBegin; 2119 /* Set the number of nonzeros in the new matrix */ 2120 for (i=0; i<m; i++) { 2121 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2122 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2123 nnz[i] = 0; 2124 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2125 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2126 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2127 nnz[i]++; 2128 } 2129 for (; k<nzy; k++) nnz[i]++; 2130 } 2131 PetscFunctionReturn(0); 2132 } 2133 2134 #undef __FUNCT__ 2135 #define __FUNCT__ "MatAXPY_MPIAIJ" 2136 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2137 { 2138 PetscErrorCode ierr; 2139 PetscInt i; 2140 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2141 PetscBLASInt bnz,one=1; 2142 Mat_SeqAIJ *x,*y; 2143 2144 PetscFunctionBegin; 2145 if (str == SAME_NONZERO_PATTERN) { 2146 PetscScalar alpha = a; 2147 x = (Mat_SeqAIJ*)xx->A->data; 2148 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2149 y = (Mat_SeqAIJ*)yy->A->data; 2150 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2151 x = (Mat_SeqAIJ*)xx->B->data; 2152 y = (Mat_SeqAIJ*)yy->B->data; 2153 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2154 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2155 } else if (str == SUBSET_NONZERO_PATTERN) { 2156 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2157 2158 x = (Mat_SeqAIJ*)xx->B->data; 2159 y = (Mat_SeqAIJ*)yy->B->data; 2160 if (y->xtoy && y->XtoY != xx->B) { 2161 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2162 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2163 } 2164 if (!y->xtoy) { /* get xtoy */ 2165 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2166 y->XtoY = xx->B; 2167 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2168 } 2169 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2170 } else { 2171 Mat B; 2172 PetscInt *nnz_d,*nnz_o; 2173 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2174 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2175 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2176 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2177 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2178 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2179 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2180 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2181 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2182 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2183 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2184 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2185 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2186 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2187 } 2188 PetscFunctionReturn(0); 2189 } 2190 2191 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2192 2193 #undef __FUNCT__ 2194 #define __FUNCT__ "MatConjugate_MPIAIJ" 2195 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2196 { 2197 #if defined(PETSC_USE_COMPLEX) 2198 PetscErrorCode ierr; 2199 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2200 2201 PetscFunctionBegin; 2202 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2203 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2204 #else 2205 PetscFunctionBegin; 2206 #endif 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatRealPart_MPIAIJ" 2212 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2213 { 2214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2215 PetscErrorCode ierr; 2216 2217 PetscFunctionBegin; 2218 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2219 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2220 PetscFunctionReturn(0); 2221 } 2222 2223 #undef __FUNCT__ 2224 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2225 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2226 { 2227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2228 PetscErrorCode ierr; 2229 2230 PetscFunctionBegin; 2231 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2232 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #if defined(PETSC_HAVE_PBGL) 2237 2238 #include <boost/parallel/mpi/bsp_process_group.hpp> 2239 #include <boost/graph/distributed/ilu_default_graph.hpp> 2240 #include <boost/graph/distributed/ilu_0_block.hpp> 2241 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2242 #include <boost/graph/distributed/petsc/interface.hpp> 2243 #include <boost/multi_array.hpp> 2244 #include <boost/parallel/distributed_property_map->hpp> 2245 2246 #undef __FUNCT__ 2247 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2248 /* 2249 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2250 */ 2251 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2252 { 2253 namespace petsc = boost::distributed::petsc; 2254 2255 namespace graph_dist = boost::graph::distributed; 2256 using boost::graph::distributed::ilu_default::process_group_type; 2257 using boost::graph::ilu_permuted; 2258 2259 PetscBool row_identity, col_identity; 2260 PetscContainer c; 2261 PetscInt m, n, M, N; 2262 PetscErrorCode ierr; 2263 2264 PetscFunctionBegin; 2265 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2266 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2267 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2268 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2269 2270 process_group_type pg; 2271 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2272 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2273 lgraph_type& level_graph = *lgraph_p; 2274 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2275 2276 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2277 ilu_permuted(level_graph); 2278 2279 /* put together the new matrix */ 2280 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2281 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2282 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2283 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2284 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2285 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2286 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2288 2289 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2290 ierr = PetscContainerSetPointer(c, lgraph_p); 2291 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2292 ierr = PetscContainerDestroy(&c); 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2298 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2299 { 2300 PetscFunctionBegin; 2301 PetscFunctionReturn(0); 2302 } 2303 2304 #undef __FUNCT__ 2305 #define __FUNCT__ "MatSolve_MPIAIJ" 2306 /* 2307 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2308 */ 2309 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2310 { 2311 namespace graph_dist = boost::graph::distributed; 2312 2313 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2314 lgraph_type *lgraph_p; 2315 PetscContainer c; 2316 PetscErrorCode ierr; 2317 2318 PetscFunctionBegin; 2319 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2320 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2321 ierr = VecCopy(b, x);CHKERRQ(ierr); 2322 2323 PetscScalar *array_x; 2324 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2325 PetscInt sx; 2326 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2327 2328 PetscScalar *array_b; 2329 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2330 PetscInt sb; 2331 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2332 2333 lgraph_type& level_graph = *lgraph_p; 2334 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2335 2336 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2337 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2338 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2339 2340 typedef boost::iterator_property_map<array_ref_type::iterator, 2341 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2342 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2343 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2344 2345 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2346 PetscFunctionReturn(0); 2347 } 2348 #endif 2349 2350 #undef __FUNCT__ 2351 #define __FUNCT__ "MatDestroy_MatRedundant" 2352 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2353 { 2354 PetscErrorCode ierr; 2355 Mat_Redundant *redund; 2356 PetscInt i; 2357 PetscMPIInt size; 2358 2359 PetscFunctionBegin; 2360 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2361 if (size == 1) { 2362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2363 redund = a->redundant; 2364 } else { 2365 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2366 redund = a->redundant; 2367 } 2368 if (redund){ 2369 if (redund->matseq) { /* via MatGetSubMatrices() */ 2370 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2371 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2372 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2373 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2374 } else { 2375 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2376 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2377 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2378 for (i=0; i<redund->nrecvs; i++) { 2379 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2380 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2381 } 2382 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2383 } 2384 2385 if (redund->psubcomm) { 2386 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2387 } 2388 ierr = redund->Destroy(A);CHKERRQ(ierr); 2389 ierr = PetscFree(redund);CHKERRQ(ierr); 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2397 { 2398 PetscMPIInt rank,size; 2399 MPI_Comm comm; 2400 PetscErrorCode ierr; 2401 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2402 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2403 PetscInt *rowrange = mat->rmap->range; 2404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2405 Mat A = aij->A,B=aij->B,C=*matredundant; 2406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2407 PetscScalar *sbuf_a; 2408 PetscInt nzlocal=a->nz+b->nz; 2409 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2410 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2411 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2412 MatScalar *aworkA,*aworkB; 2413 PetscScalar *vals; 2414 PetscMPIInt tag1,tag2,tag3,imdex; 2415 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2416 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2417 MPI_Status recv_status,*send_status; 2418 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2419 PetscInt **rbuf_j=NULL; 2420 PetscScalar **rbuf_a=NULL; 2421 Mat_Redundant *redund =NULL; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2427 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2428 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2429 2430 if (reuse == MAT_REUSE_MATRIX) { 2431 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2432 if (subsize == 1) { 2433 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2434 redund = c->redundant; 2435 } else { 2436 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2437 redund = c->redundant; 2438 } 2439 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2440 2441 nsends = redund->nsends; 2442 nrecvs = redund->nrecvs; 2443 send_rank = redund->send_rank; 2444 recv_rank = redund->recv_rank; 2445 sbuf_nz = redund->sbuf_nz; 2446 rbuf_nz = redund->rbuf_nz; 2447 sbuf_j = redund->sbuf_j; 2448 sbuf_a = redund->sbuf_a; 2449 rbuf_j = redund->rbuf_j; 2450 rbuf_a = redund->rbuf_a; 2451 } 2452 2453 if (reuse == MAT_INITIAL_MATRIX) { 2454 PetscInt nleftover,np_subcomm; 2455 2456 /* get the destination processors' id send_rank, nsends and nrecvs */ 2457 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2458 2459 np_subcomm = size/nsubcomm; 2460 nleftover = size - nsubcomm*np_subcomm; 2461 2462 /* block of codes below is specific for INTERLACED */ 2463 /* ------------------------------------------------*/ 2464 nsends = 0; nrecvs = 0; 2465 for (i=0; i<size; i++) { 2466 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2467 send_rank[nsends++] = i; 2468 recv_rank[nrecvs++] = i; 2469 } 2470 } 2471 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2472 i = size-nleftover-1; 2473 j = 0; 2474 while (j < nsubcomm - nleftover) { 2475 send_rank[nsends++] = i; 2476 i--; j++; 2477 } 2478 } 2479 2480 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2481 for (i=0; i<nleftover; i++) { 2482 recv_rank[nrecvs++] = size-nleftover+i; 2483 } 2484 } 2485 /*----------------------------------------------*/ 2486 2487 /* allocate sbuf_j, sbuf_a */ 2488 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2489 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2490 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2491 /* 2492 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2493 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2494 */ 2495 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2496 2497 /* copy mat's local entries into the buffers */ 2498 if (reuse == MAT_INITIAL_MATRIX) { 2499 rownz_max = 0; 2500 rptr = sbuf_j; 2501 cols = sbuf_j + rend-rstart + 1; 2502 vals = sbuf_a; 2503 rptr[0] = 0; 2504 for (i=0; i<rend-rstart; i++) { 2505 row = i + rstart; 2506 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2507 ncols = nzA + nzB; 2508 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2509 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2510 /* load the column indices for this row into cols */ 2511 lwrite = 0; 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 for (l=0; l<nzA; l++) { 2519 vals[lwrite] = aworkA[l]; 2520 cols[lwrite++] = cstart + cworkA[l]; 2521 } 2522 for (l=0; l<nzB; l++) { 2523 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2524 vals[lwrite] = aworkB[l]; 2525 cols[lwrite++] = ctmp; 2526 } 2527 } 2528 vals += ncols; 2529 cols += ncols; 2530 rptr[i+1] = rptr[i] + ncols; 2531 if (rownz_max < ncols) rownz_max = ncols; 2532 } 2533 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2534 } else { /* only copy matrix values into sbuf_a */ 2535 rptr = sbuf_j; 2536 vals = sbuf_a; 2537 rptr[0] = 0; 2538 for (i=0; i<rend-rstart; i++) { 2539 row = i + rstart; 2540 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2541 ncols = nzA + nzB; 2542 cworkB = b->j + b->i[i]; 2543 aworkA = a->a + a->i[i]; 2544 aworkB = b->a + b->i[i]; 2545 lwrite = 0; 2546 for (l=0; l<nzB; l++) { 2547 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2548 } 2549 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2550 for (l=0; l<nzB; l++) { 2551 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2552 } 2553 vals += ncols; 2554 rptr[i+1] = rptr[i] + ncols; 2555 } 2556 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2557 2558 /* send nzlocal to others, and recv other's nzlocal */ 2559 /*--------------------------------------------------*/ 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2562 2563 s_waits2 = s_waits3 + nsends; 2564 s_waits1 = s_waits2 + nsends; 2565 r_waits1 = s_waits1 + nsends; 2566 r_waits2 = r_waits1 + nrecvs; 2567 r_waits3 = r_waits2 + nrecvs; 2568 } else { 2569 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2570 2571 r_waits3 = s_waits3 + nsends; 2572 } 2573 2574 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2575 if (reuse == MAT_INITIAL_MATRIX) { 2576 /* get new tags to keep the communication clean */ 2577 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2578 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2579 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2580 2581 /* post receives of other's nzlocal */ 2582 for (i=0; i<nrecvs; i++) { 2583 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2584 } 2585 /* send nzlocal to others */ 2586 for (i=0; i<nsends; i++) { 2587 sbuf_nz[i] = nzlocal; 2588 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2589 } 2590 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2591 count = nrecvs; 2592 while (count) { 2593 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2594 2595 recv_rank[imdex] = recv_status.MPI_SOURCE; 2596 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2597 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2598 2599 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2600 2601 rbuf_nz[imdex] += i + 2; 2602 2603 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2604 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2605 count--; 2606 } 2607 /* wait on sends of nzlocal */ 2608 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2609 /* send mat->i,j to others, and recv from other's */ 2610 /*------------------------------------------------*/ 2611 for (i=0; i<nsends; i++) { 2612 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2613 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2614 } 2615 /* wait on receives of mat->i,j */ 2616 /*------------------------------*/ 2617 count = nrecvs; 2618 while (count) { 2619 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2620 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2621 count--; 2622 } 2623 /* wait on sends of mat->i,j */ 2624 /*---------------------------*/ 2625 if (nsends) { 2626 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2627 } 2628 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2629 2630 /* post receives, send and receive mat->a */ 2631 /*----------------------------------------*/ 2632 for (imdex=0; imdex<nrecvs; imdex++) { 2633 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<nsends; i++) { 2636 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2637 } 2638 count = nrecvs; 2639 while (count) { 2640 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2641 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2642 count--; 2643 } 2644 if (nsends) { 2645 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2646 } 2647 2648 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2649 2650 /* create redundant matrix */ 2651 /*-------------------------*/ 2652 if (reuse == MAT_INITIAL_MATRIX) { 2653 const PetscInt *range; 2654 PetscInt rstart_sub,rend_sub,mloc_sub; 2655 2656 /* compute rownz_max for preallocation */ 2657 for (imdex=0; imdex<nrecvs; imdex++) { 2658 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2659 rptr = rbuf_j[imdex]; 2660 for (i=0; i<j; i++) { 2661 ncols = rptr[i+1] - rptr[i]; 2662 if (rownz_max < ncols) rownz_max = ncols; 2663 } 2664 } 2665 2666 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2667 2668 /* get local size of redundant matrix 2669 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2670 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2671 rstart_sub = range[nsubcomm*subrank]; 2672 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2673 rend_sub = range[nsubcomm*(subrank+1)]; 2674 } else { 2675 rend_sub = mat->rmap->N; 2676 } 2677 mloc_sub = rend_sub - rstart_sub; 2678 2679 if (M == N) { 2680 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2681 } else { /* non-square matrix */ 2682 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2683 } 2684 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2685 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2686 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2687 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2688 } else { 2689 C = *matredundant; 2690 } 2691 2692 /* insert local matrix entries */ 2693 rptr = sbuf_j; 2694 cols = sbuf_j + rend-rstart + 1; 2695 vals = sbuf_a; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 /* insert received matrix entries */ 2704 for (imdex=0; imdex<nrecvs; imdex++) { 2705 rstart = rowrange[recv_rank[imdex]]; 2706 rend = rowrange[recv_rank[imdex]+1]; 2707 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2708 rptr = rbuf_j[imdex]; 2709 cols = rbuf_j[imdex] + rend-rstart + 1; 2710 vals = rbuf_a[imdex]; 2711 for (i=0; i<rend-rstart; i++) { 2712 row = i + rstart; 2713 ncols = rptr[i+1] - rptr[i]; 2714 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2715 vals += ncols; 2716 cols += ncols; 2717 } 2718 } 2719 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 2722 if (reuse == MAT_INITIAL_MATRIX) { 2723 *matredundant = C; 2724 2725 /* create a supporting struct and attach it to C for reuse */ 2726 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2727 if (subsize == 1) { 2728 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2729 c->redundant = redund; 2730 } else { 2731 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2732 c->redundant = redund; 2733 } 2734 2735 redund->nzlocal = nzlocal; 2736 redund->nsends = nsends; 2737 redund->nrecvs = nrecvs; 2738 redund->send_rank = send_rank; 2739 redund->recv_rank = recv_rank; 2740 redund->sbuf_nz = sbuf_nz; 2741 redund->rbuf_nz = rbuf_nz; 2742 redund->sbuf_j = sbuf_j; 2743 redund->sbuf_a = sbuf_a; 2744 redund->rbuf_j = rbuf_j; 2745 redund->rbuf_a = rbuf_a; 2746 redund->psubcomm = NULL; 2747 2748 redund->Destroy = C->ops->destroy; 2749 C->ops->destroy = MatDestroy_MatRedundant; 2750 } 2751 PetscFunctionReturn(0); 2752 } 2753 2754 #undef __FUNCT__ 2755 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2756 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2757 { 2758 PetscErrorCode ierr; 2759 MPI_Comm comm; 2760 PetscMPIInt size,subsize; 2761 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2762 Mat_Redundant *redund=NULL; 2763 PetscSubcomm psubcomm=NULL; 2764 MPI_Comm subcomm_in=subcomm; 2765 Mat *matseq; 2766 IS isrow,iscol; 2767 2768 PetscFunctionBegin; 2769 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2770 if (reuse == MAT_INITIAL_MATRIX) { 2771 /* create psubcomm, then get subcomm */ 2772 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2773 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2774 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2775 2776 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2777 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2778 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2779 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2780 subcomm = psubcomm->comm; 2781 } else { /* retrieve psubcomm and subcomm */ 2782 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2783 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2784 if (subsize == 1) { 2785 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2786 redund = c->redundant; 2787 } else { 2788 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2789 redund = c->redundant; 2790 } 2791 psubcomm = redund->psubcomm; 2792 } 2793 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2794 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2795 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2796 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2797 if (subsize == 1) { 2798 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2799 c->redundant->psubcomm = psubcomm; 2800 } else { 2801 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2802 c->redundant->psubcomm = psubcomm ; 2803 } 2804 } 2805 PetscFunctionReturn(0); 2806 } 2807 } 2808 2809 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2810 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2811 if (reuse == MAT_INITIAL_MATRIX) { 2812 /* create a local sequential matrix matseq[0] */ 2813 mloc_sub = PETSC_DECIDE; 2814 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2815 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2816 rstart = rend - mloc_sub; 2817 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2818 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2819 } else { /* reuse == MAT_REUSE_MATRIX */ 2820 if (subsize == 1) { 2821 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2822 redund = c->redundant; 2823 } else { 2824 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2825 redund = c->redundant; 2826 } 2827 2828 isrow = redund->isrow; 2829 iscol = redund->iscol; 2830 matseq = redund->matseq; 2831 } 2832 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2833 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2834 2835 if (reuse == MAT_INITIAL_MATRIX) { 2836 /* create a supporting struct and attach it to C for reuse */ 2837 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2838 if (subsize == 1) { 2839 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2840 c->redundant = redund; 2841 } else { 2842 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2843 c->redundant = redund; 2844 } 2845 redund->isrow = isrow; 2846 redund->iscol = iscol; 2847 redund->matseq = matseq; 2848 redund->psubcomm = psubcomm; 2849 redund->Destroy = (*matredundant)->ops->destroy; 2850 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2851 } 2852 PetscFunctionReturn(0); 2853 } 2854 2855 #undef __FUNCT__ 2856 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2857 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2858 { 2859 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2860 PetscErrorCode ierr; 2861 PetscInt i,*idxb = 0; 2862 PetscScalar *va,*vb; 2863 Vec vtmp; 2864 2865 PetscFunctionBegin; 2866 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2867 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2868 if (idx) { 2869 for (i=0; i<A->rmap->n; i++) { 2870 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2871 } 2872 } 2873 2874 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2875 if (idx) { 2876 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2877 } 2878 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2879 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2880 2881 for (i=0; i<A->rmap->n; i++) { 2882 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2883 va[i] = vb[i]; 2884 if (idx) idx[i] = a->garray[idxb[i]]; 2885 } 2886 } 2887 2888 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2889 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2890 ierr = PetscFree(idxb);CHKERRQ(ierr); 2891 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 #undef __FUNCT__ 2896 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2897 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2898 { 2899 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2900 PetscErrorCode ierr; 2901 PetscInt i,*idxb = 0; 2902 PetscScalar *va,*vb; 2903 Vec vtmp; 2904 2905 PetscFunctionBegin; 2906 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2907 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2908 if (idx) { 2909 for (i=0; i<A->cmap->n; i++) { 2910 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2911 } 2912 } 2913 2914 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2915 if (idx) { 2916 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2917 } 2918 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2919 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2920 2921 for (i=0; i<A->rmap->n; i++) { 2922 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2923 va[i] = vb[i]; 2924 if (idx) idx[i] = a->garray[idxb[i]]; 2925 } 2926 } 2927 2928 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2930 ierr = PetscFree(idxb);CHKERRQ(ierr); 2931 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2932 PetscFunctionReturn(0); 2933 } 2934 2935 #undef __FUNCT__ 2936 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2937 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2938 { 2939 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2940 PetscInt n = A->rmap->n; 2941 PetscInt cstart = A->cmap->rstart; 2942 PetscInt *cmap = mat->garray; 2943 PetscInt *diagIdx, *offdiagIdx; 2944 Vec diagV, offdiagV; 2945 PetscScalar *a, *diagA, *offdiagA; 2946 PetscInt r; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2952 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2953 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2954 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2955 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2956 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2957 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2958 for (r = 0; r < n; ++r) { 2959 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2960 a[r] = diagA[r]; 2961 idx[r] = cstart + diagIdx[r]; 2962 } else { 2963 a[r] = offdiagA[r]; 2964 idx[r] = cmap[offdiagIdx[r]]; 2965 } 2966 } 2967 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2969 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2970 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2971 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2972 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2973 PetscFunctionReturn(0); 2974 } 2975 2976 #undef __FUNCT__ 2977 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2978 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2979 { 2980 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2981 PetscInt n = A->rmap->n; 2982 PetscInt cstart = A->cmap->rstart; 2983 PetscInt *cmap = mat->garray; 2984 PetscInt *diagIdx, *offdiagIdx; 2985 Vec diagV, offdiagV; 2986 PetscScalar *a, *diagA, *offdiagA; 2987 PetscInt r; 2988 PetscErrorCode ierr; 2989 2990 PetscFunctionBegin; 2991 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2992 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2993 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2994 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2995 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2996 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2997 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2998 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2999 for (r = 0; r < n; ++r) { 3000 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3001 a[r] = diagA[r]; 3002 idx[r] = cstart + diagIdx[r]; 3003 } else { 3004 a[r] = offdiagA[r]; 3005 idx[r] = cmap[offdiagIdx[r]]; 3006 } 3007 } 3008 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3010 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3011 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3012 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3013 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3014 PetscFunctionReturn(0); 3015 } 3016 3017 #undef __FUNCT__ 3018 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3019 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3020 { 3021 PetscErrorCode ierr; 3022 Mat *dummy; 3023 3024 PetscFunctionBegin; 3025 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3026 *newmat = *dummy; 3027 ierr = PetscFree(dummy);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 #undef __FUNCT__ 3032 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3033 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3034 { 3035 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 #undef __FUNCT__ 3044 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3045 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3046 { 3047 PetscErrorCode ierr; 3048 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3049 3050 PetscFunctionBegin; 3051 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3052 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3053 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 PetscFunctionReturn(0); 3056 } 3057 3058 /* -------------------------------------------------------------------*/ 3059 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3060 MatGetRow_MPIAIJ, 3061 MatRestoreRow_MPIAIJ, 3062 MatMult_MPIAIJ, 3063 /* 4*/ MatMultAdd_MPIAIJ, 3064 MatMultTranspose_MPIAIJ, 3065 MatMultTransposeAdd_MPIAIJ, 3066 #if defined(PETSC_HAVE_PBGL) 3067 MatSolve_MPIAIJ, 3068 #else 3069 0, 3070 #endif 3071 0, 3072 0, 3073 /*10*/ 0, 3074 0, 3075 0, 3076 MatSOR_MPIAIJ, 3077 MatTranspose_MPIAIJ, 3078 /*15*/ MatGetInfo_MPIAIJ, 3079 MatEqual_MPIAIJ, 3080 MatGetDiagonal_MPIAIJ, 3081 MatDiagonalScale_MPIAIJ, 3082 MatNorm_MPIAIJ, 3083 /*20*/ MatAssemblyBegin_MPIAIJ, 3084 MatAssemblyEnd_MPIAIJ, 3085 MatSetOption_MPIAIJ, 3086 MatZeroEntries_MPIAIJ, 3087 /*24*/ MatZeroRows_MPIAIJ, 3088 0, 3089 #if defined(PETSC_HAVE_PBGL) 3090 0, 3091 #else 3092 0, 3093 #endif 3094 0, 3095 0, 3096 /*29*/ MatSetUp_MPIAIJ, 3097 #if defined(PETSC_HAVE_PBGL) 3098 0, 3099 #else 3100 0, 3101 #endif 3102 0, 3103 0, 3104 0, 3105 /*34*/ MatDuplicate_MPIAIJ, 3106 0, 3107 0, 3108 0, 3109 0, 3110 /*39*/ MatAXPY_MPIAIJ, 3111 MatGetSubMatrices_MPIAIJ, 3112 MatIncreaseOverlap_MPIAIJ, 3113 MatGetValues_MPIAIJ, 3114 MatCopy_MPIAIJ, 3115 /*44*/ MatGetRowMax_MPIAIJ, 3116 MatScale_MPIAIJ, 3117 0, 3118 0, 3119 MatZeroRowsColumns_MPIAIJ, 3120 /*49*/ MatSetRandom_MPIAIJ, 3121 0, 3122 0, 3123 0, 3124 0, 3125 /*54*/ MatFDColoringCreate_MPIXAIJ, 3126 0, 3127 MatSetUnfactored_MPIAIJ, 3128 MatPermute_MPIAIJ, 3129 0, 3130 /*59*/ MatGetSubMatrix_MPIAIJ, 3131 MatDestroy_MPIAIJ, 3132 MatView_MPIAIJ, 3133 0, 3134 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3135 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3136 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3137 0, 3138 0, 3139 0, 3140 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3141 MatGetRowMinAbs_MPIAIJ, 3142 0, 3143 MatSetColoring_MPIAIJ, 3144 0, 3145 MatSetValuesAdifor_MPIAIJ, 3146 /*75*/ MatFDColoringApply_AIJ, 3147 0, 3148 0, 3149 0, 3150 MatFindZeroDiagonals_MPIAIJ, 3151 /*80*/ 0, 3152 0, 3153 0, 3154 /*83*/ MatLoad_MPIAIJ, 3155 0, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3161 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3162 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3163 MatPtAP_MPIAIJ_MPIAIJ, 3164 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3165 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3166 0, 3167 0, 3168 0, 3169 0, 3170 /*99*/ 0, 3171 0, 3172 0, 3173 MatConjugate_MPIAIJ, 3174 0, 3175 /*104*/MatSetValuesRow_MPIAIJ, 3176 MatRealPart_MPIAIJ, 3177 MatImaginaryPart_MPIAIJ, 3178 0, 3179 0, 3180 /*109*/0, 3181 MatGetRedundantMatrix_MPIAIJ, 3182 MatGetRowMin_MPIAIJ, 3183 0, 3184 0, 3185 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3186 0, 3187 0, 3188 0, 3189 0, 3190 /*119*/0, 3191 0, 3192 0, 3193 0, 3194 MatGetMultiProcBlock_MPIAIJ, 3195 /*124*/MatFindNonzeroRows_MPIAIJ, 3196 MatGetColumnNorms_MPIAIJ, 3197 MatInvertBlockDiagonal_MPIAIJ, 3198 0, 3199 MatGetSubMatricesParallel_MPIAIJ, 3200 /*129*/0, 3201 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3202 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3203 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3204 0, 3205 /*134*/0, 3206 0, 3207 0, 3208 0, 3209 0, 3210 /*139*/0, 3211 0, 3212 0, 3213 MatFDColoringSetUp_MPIXAIJ 3214 }; 3215 3216 /* ----------------------------------------------------------------------------------------*/ 3217 3218 #undef __FUNCT__ 3219 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3220 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3221 { 3222 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3223 PetscErrorCode ierr; 3224 3225 PetscFunctionBegin; 3226 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3227 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 #undef __FUNCT__ 3232 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3233 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3234 { 3235 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3240 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3241 PetscFunctionReturn(0); 3242 } 3243 3244 #undef __FUNCT__ 3245 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3246 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3247 { 3248 Mat_MPIAIJ *b; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3253 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3254 b = (Mat_MPIAIJ*)B->data; 3255 3256 if (!B->preallocated) { 3257 /* Explicitly create 2 MATSEQAIJ matrices. */ 3258 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3259 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3260 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3261 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3263 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3264 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3265 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3266 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3267 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3268 } 3269 3270 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3271 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3272 B->preallocated = PETSC_TRUE; 3273 PetscFunctionReturn(0); 3274 } 3275 3276 #undef __FUNCT__ 3277 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3278 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3279 { 3280 Mat mat; 3281 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3282 PetscErrorCode ierr; 3283 3284 PetscFunctionBegin; 3285 *newmat = 0; 3286 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3287 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3288 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3289 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3290 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3291 a = (Mat_MPIAIJ*)mat->data; 3292 3293 mat->factortype = matin->factortype; 3294 mat->assembled = PETSC_TRUE; 3295 mat->insertmode = NOT_SET_VALUES; 3296 mat->preallocated = PETSC_TRUE; 3297 3298 a->size = oldmat->size; 3299 a->rank = oldmat->rank; 3300 a->donotstash = oldmat->donotstash; 3301 a->roworiented = oldmat->roworiented; 3302 a->rowindices = 0; 3303 a->rowvalues = 0; 3304 a->getrowactive = PETSC_FALSE; 3305 3306 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3307 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3308 3309 if (oldmat->colmap) { 3310 #if defined(PETSC_USE_CTABLE) 3311 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3312 #else 3313 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3314 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3315 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3316 #endif 3317 } else a->colmap = 0; 3318 if (oldmat->garray) { 3319 PetscInt len; 3320 len = oldmat->B->cmap->n; 3321 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3322 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3323 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3324 } else a->garray = 0; 3325 3326 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3327 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3328 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3329 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3330 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3331 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3332 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3333 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3334 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3335 *newmat = mat; 3336 PetscFunctionReturn(0); 3337 } 3338 3339 3340 3341 #undef __FUNCT__ 3342 #define __FUNCT__ "MatLoad_MPIAIJ" 3343 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3344 { 3345 PetscScalar *vals,*svals; 3346 MPI_Comm comm; 3347 PetscErrorCode ierr; 3348 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3349 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3350 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3351 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3352 PetscInt cend,cstart,n,*rowners,sizesset=1; 3353 int fd; 3354 PetscInt bs = 1; 3355 3356 PetscFunctionBegin; 3357 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3358 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3359 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3360 if (!rank) { 3361 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3362 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3363 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3364 } 3365 3366 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3367 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3368 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3369 3370 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3371 3372 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3373 M = header[1]; N = header[2]; 3374 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3375 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3376 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3377 3378 /* If global sizes are set, check if they are consistent with that given in the file */ 3379 if (sizesset) { 3380 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3381 } 3382 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3383 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3384 3385 /* determine ownership of all (block) rows */ 3386 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3387 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3388 else m = newMat->rmap->n; /* Set by user */ 3389 3390 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3391 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3392 3393 /* First process needs enough room for process with most rows */ 3394 if (!rank) { 3395 mmax = rowners[1]; 3396 for (i=2; i<=size; i++) { 3397 mmax = PetscMax(mmax, rowners[i]); 3398 } 3399 } else mmax = -1; /* unused, but compilers complain */ 3400 3401 rowners[0] = 0; 3402 for (i=2; i<=size; i++) { 3403 rowners[i] += rowners[i-1]; 3404 } 3405 rstart = rowners[rank]; 3406 rend = rowners[rank+1]; 3407 3408 /* distribute row lengths to all processors */ 3409 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3410 if (!rank) { 3411 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3412 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3413 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3414 for (j=0; j<m; j++) { 3415 procsnz[0] += ourlens[j]; 3416 } 3417 for (i=1; i<size; i++) { 3418 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3419 /* calculate the number of nonzeros on each processor */ 3420 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3421 procsnz[i] += rowlengths[j]; 3422 } 3423 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3424 } 3425 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3426 } else { 3427 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3428 } 3429 3430 if (!rank) { 3431 /* determine max buffer needed and allocate it */ 3432 maxnz = 0; 3433 for (i=0; i<size; i++) { 3434 maxnz = PetscMax(maxnz,procsnz[i]); 3435 } 3436 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3437 3438 /* read in my part of the matrix column indices */ 3439 nz = procsnz[0]; 3440 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3441 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3442 3443 /* read in every one elses and ship off */ 3444 for (i=1; i<size; i++) { 3445 nz = procsnz[i]; 3446 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3447 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3448 } 3449 ierr = PetscFree(cols);CHKERRQ(ierr); 3450 } else { 3451 /* determine buffer space needed for message */ 3452 nz = 0; 3453 for (i=0; i<m; i++) { 3454 nz += ourlens[i]; 3455 } 3456 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3457 3458 /* receive message of column indices*/ 3459 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3460 } 3461 3462 /* determine column ownership if matrix is not square */ 3463 if (N != M) { 3464 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3465 else n = newMat->cmap->n; 3466 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3467 cstart = cend - n; 3468 } else { 3469 cstart = rstart; 3470 cend = rend; 3471 n = cend - cstart; 3472 } 3473 3474 /* loop over local rows, determining number of off diagonal entries */ 3475 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3476 jj = 0; 3477 for (i=0; i<m; i++) { 3478 for (j=0; j<ourlens[i]; j++) { 3479 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3480 jj++; 3481 } 3482 } 3483 3484 for (i=0; i<m; i++) { 3485 ourlens[i] -= offlens[i]; 3486 } 3487 if (!sizesset) { 3488 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3489 } 3490 3491 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3492 3493 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3494 3495 for (i=0; i<m; i++) { 3496 ourlens[i] += offlens[i]; 3497 } 3498 3499 if (!rank) { 3500 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3501 3502 /* read in my part of the matrix numerical values */ 3503 nz = procsnz[0]; 3504 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3505 3506 /* insert into matrix */ 3507 jj = rstart; 3508 smycols = mycols; 3509 svals = vals; 3510 for (i=0; i<m; i++) { 3511 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3512 smycols += ourlens[i]; 3513 svals += ourlens[i]; 3514 jj++; 3515 } 3516 3517 /* read in other processors and ship out */ 3518 for (i=1; i<size; i++) { 3519 nz = procsnz[i]; 3520 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3521 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3522 } 3523 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3524 } else { 3525 /* receive numeric values */ 3526 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3527 3528 /* receive message of values*/ 3529 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3530 3531 /* insert into matrix */ 3532 jj = rstart; 3533 smycols = mycols; 3534 svals = vals; 3535 for (i=0; i<m; i++) { 3536 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3537 smycols += ourlens[i]; 3538 svals += ourlens[i]; 3539 jj++; 3540 } 3541 } 3542 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3543 ierr = PetscFree(vals);CHKERRQ(ierr); 3544 ierr = PetscFree(mycols);CHKERRQ(ierr); 3545 ierr = PetscFree(rowners);CHKERRQ(ierr); 3546 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3547 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3548 PetscFunctionReturn(0); 3549 } 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3553 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3554 { 3555 PetscErrorCode ierr; 3556 IS iscol_local; 3557 PetscInt csize; 3558 3559 PetscFunctionBegin; 3560 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3561 if (call == MAT_REUSE_MATRIX) { 3562 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3563 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3564 } else { 3565 PetscInt cbs; 3566 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3567 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3568 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3569 } 3570 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3571 if (call == MAT_INITIAL_MATRIX) { 3572 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3573 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3574 } 3575 PetscFunctionReturn(0); 3576 } 3577 3578 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3579 #undef __FUNCT__ 3580 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3581 /* 3582 Not great since it makes two copies of the submatrix, first an SeqAIJ 3583 in local and then by concatenating the local matrices the end result. 3584 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3585 3586 Note: This requires a sequential iscol with all indices. 3587 */ 3588 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3589 { 3590 PetscErrorCode ierr; 3591 PetscMPIInt rank,size; 3592 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3593 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3594 PetscBool allcolumns, colflag; 3595 Mat M,Mreuse; 3596 MatScalar *vwork,*aa; 3597 MPI_Comm comm; 3598 Mat_SeqAIJ *aij; 3599 3600 PetscFunctionBegin; 3601 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3602 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3603 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3604 3605 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3606 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3607 if (colflag && ncol == mat->cmap->N) { 3608 allcolumns = PETSC_TRUE; 3609 } else { 3610 allcolumns = PETSC_FALSE; 3611 } 3612 if (call == MAT_REUSE_MATRIX) { 3613 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3614 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3615 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3616 } else { 3617 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3618 } 3619 3620 /* 3621 m - number of local rows 3622 n - number of columns (same on all processors) 3623 rstart - first row in new global matrix generated 3624 */ 3625 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3626 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3627 if (call == MAT_INITIAL_MATRIX) { 3628 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3629 ii = aij->i; 3630 jj = aij->j; 3631 3632 /* 3633 Determine the number of non-zeros in the diagonal and off-diagonal 3634 portions of the matrix in order to do correct preallocation 3635 */ 3636 3637 /* first get start and end of "diagonal" columns */ 3638 if (csize == PETSC_DECIDE) { 3639 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3640 if (mglobal == n) { /* square matrix */ 3641 nlocal = m; 3642 } else { 3643 nlocal = n/size + ((n % size) > rank); 3644 } 3645 } else { 3646 nlocal = csize; 3647 } 3648 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3649 rstart = rend - nlocal; 3650 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3651 3652 /* next, compute all the lengths */ 3653 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3654 olens = dlens + m; 3655 for (i=0; i<m; i++) { 3656 jend = ii[i+1] - ii[i]; 3657 olen = 0; 3658 dlen = 0; 3659 for (j=0; j<jend; j++) { 3660 if (*jj < rstart || *jj >= rend) olen++; 3661 else dlen++; 3662 jj++; 3663 } 3664 olens[i] = olen; 3665 dlens[i] = dlen; 3666 } 3667 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3668 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3669 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3670 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3671 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3672 ierr = PetscFree(dlens);CHKERRQ(ierr); 3673 } else { 3674 PetscInt ml,nl; 3675 3676 M = *newmat; 3677 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3678 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3679 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3680 /* 3681 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3682 rather than the slower MatSetValues(). 3683 */ 3684 M->was_assembled = PETSC_TRUE; 3685 M->assembled = PETSC_FALSE; 3686 } 3687 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3688 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3689 ii = aij->i; 3690 jj = aij->j; 3691 aa = aij->a; 3692 for (i=0; i<m; i++) { 3693 row = rstart + i; 3694 nz = ii[i+1] - ii[i]; 3695 cwork = jj; jj += nz; 3696 vwork = aa; aa += nz; 3697 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3698 } 3699 3700 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3702 *newmat = M; 3703 3704 /* save submatrix used in processor for next request */ 3705 if (call == MAT_INITIAL_MATRIX) { 3706 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3707 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3708 } 3709 PetscFunctionReturn(0); 3710 } 3711 3712 #undef __FUNCT__ 3713 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3714 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3715 { 3716 PetscInt m,cstart, cend,j,nnz,i,d; 3717 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3718 const PetscInt *JJ; 3719 PetscScalar *values; 3720 PetscErrorCode ierr; 3721 3722 PetscFunctionBegin; 3723 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3724 3725 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3726 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3727 m = B->rmap->n; 3728 cstart = B->cmap->rstart; 3729 cend = B->cmap->rend; 3730 rstart = B->rmap->rstart; 3731 3732 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3733 3734 #if defined(PETSC_USE_DEBUGGING) 3735 for (i=0; i<m; i++) { 3736 nnz = Ii[i+1]- Ii[i]; 3737 JJ = J + Ii[i]; 3738 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3739 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3740 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3741 } 3742 #endif 3743 3744 for (i=0; i<m; i++) { 3745 nnz = Ii[i+1]- Ii[i]; 3746 JJ = J + Ii[i]; 3747 nnz_max = PetscMax(nnz_max,nnz); 3748 d = 0; 3749 for (j=0; j<nnz; j++) { 3750 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3751 } 3752 d_nnz[i] = d; 3753 o_nnz[i] = nnz - d; 3754 } 3755 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3756 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3757 3758 if (v) values = (PetscScalar*)v; 3759 else { 3760 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3761 } 3762 3763 for (i=0; i<m; i++) { 3764 ii = i + rstart; 3765 nnz = Ii[i+1]- Ii[i]; 3766 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3767 } 3768 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3769 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3770 3771 if (!v) { 3772 ierr = PetscFree(values);CHKERRQ(ierr); 3773 } 3774 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3775 PetscFunctionReturn(0); 3776 } 3777 3778 #undef __FUNCT__ 3779 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3780 /*@ 3781 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3782 (the default parallel PETSc format). 3783 3784 Collective on MPI_Comm 3785 3786 Input Parameters: 3787 + B - the matrix 3788 . i - the indices into j for the start of each local row (starts with zero) 3789 . j - the column indices for each local row (starts with zero) 3790 - v - optional values in the matrix 3791 3792 Level: developer 3793 3794 Notes: 3795 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3796 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3797 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3798 3799 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3800 3801 The format which is used for the sparse matrix input, is equivalent to a 3802 row-major ordering.. i.e for the following matrix, the input data expected is 3803 as shown: 3804 3805 1 0 0 3806 2 0 3 P0 3807 ------- 3808 4 5 6 P1 3809 3810 Process0 [P0]: rows_owned=[0,1] 3811 i = {0,1,3} [size = nrow+1 = 2+1] 3812 j = {0,0,2} [size = nz = 6] 3813 v = {1,2,3} [size = nz = 6] 3814 3815 Process1 [P1]: rows_owned=[2] 3816 i = {0,3} [size = nrow+1 = 1+1] 3817 j = {0,1,2} [size = nz = 6] 3818 v = {4,5,6} [size = nz = 6] 3819 3820 .keywords: matrix, aij, compressed row, sparse, parallel 3821 3822 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3823 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3824 @*/ 3825 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3826 { 3827 PetscErrorCode ierr; 3828 3829 PetscFunctionBegin; 3830 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3831 PetscFunctionReturn(0); 3832 } 3833 3834 #undef __FUNCT__ 3835 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3836 /*@C 3837 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3838 (the default parallel PETSc format). For good matrix assembly performance 3839 the user should preallocate the matrix storage by setting the parameters 3840 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3841 performance can be increased by more than a factor of 50. 3842 3843 Collective on MPI_Comm 3844 3845 Input Parameters: 3846 + A - the matrix 3847 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3848 (same value is used for all local rows) 3849 . d_nnz - array containing the number of nonzeros in the various rows of the 3850 DIAGONAL portion of the local submatrix (possibly different for each row) 3851 or NULL, if d_nz is used to specify the nonzero structure. 3852 The size of this array is equal to the number of local rows, i.e 'm'. 3853 For matrices that will be factored, you must leave room for (and set) 3854 the diagonal entry even if it is zero. 3855 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3856 submatrix (same value is used for all local rows). 3857 - o_nnz - array containing the number of nonzeros in the various rows of the 3858 OFF-DIAGONAL portion of the local submatrix (possibly different for 3859 each row) or NULL, if o_nz is used to specify the nonzero 3860 structure. The size of this array is equal to the number 3861 of local rows, i.e 'm'. 3862 3863 If the *_nnz parameter is given then the *_nz parameter is ignored 3864 3865 The AIJ format (also called the Yale sparse matrix format or 3866 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3867 storage. The stored row and column indices begin with zero. 3868 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3869 3870 The parallel matrix is partitioned such that the first m0 rows belong to 3871 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3872 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3873 3874 The DIAGONAL portion of the local submatrix of a processor can be defined 3875 as the submatrix which is obtained by extraction the part corresponding to 3876 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3877 first row that belongs to the processor, r2 is the last row belonging to 3878 the this processor, and c1-c2 is range of indices of the local part of a 3879 vector suitable for applying the matrix to. This is an mxn matrix. In the 3880 common case of a square matrix, the row and column ranges are the same and 3881 the DIAGONAL part is also square. The remaining portion of the local 3882 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3883 3884 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3885 3886 You can call MatGetInfo() to get information on how effective the preallocation was; 3887 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3888 You can also run with the option -info and look for messages with the string 3889 malloc in them to see if additional memory allocation was needed. 3890 3891 Example usage: 3892 3893 Consider the following 8x8 matrix with 34 non-zero values, that is 3894 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3895 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3896 as follows: 3897 3898 .vb 3899 1 2 0 | 0 3 0 | 0 4 3900 Proc0 0 5 6 | 7 0 0 | 8 0 3901 9 0 10 | 11 0 0 | 12 0 3902 ------------------------------------- 3903 13 0 14 | 15 16 17 | 0 0 3904 Proc1 0 18 0 | 19 20 21 | 0 0 3905 0 0 0 | 22 23 0 | 24 0 3906 ------------------------------------- 3907 Proc2 25 26 27 | 0 0 28 | 29 0 3908 30 0 0 | 31 32 33 | 0 34 3909 .ve 3910 3911 This can be represented as a collection of submatrices as: 3912 3913 .vb 3914 A B C 3915 D E F 3916 G H I 3917 .ve 3918 3919 Where the submatrices A,B,C are owned by proc0, D,E,F are 3920 owned by proc1, G,H,I are owned by proc2. 3921 3922 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3923 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3924 The 'M','N' parameters are 8,8, and have the same values on all procs. 3925 3926 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3927 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3928 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3929 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3930 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3931 matrix, ans [DF] as another SeqAIJ matrix. 3932 3933 When d_nz, o_nz parameters are specified, d_nz storage elements are 3934 allocated for every row of the local diagonal submatrix, and o_nz 3935 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3936 One way to choose d_nz and o_nz is to use the max nonzerors per local 3937 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3938 In this case, the values of d_nz,o_nz are: 3939 .vb 3940 proc0 : dnz = 2, o_nz = 2 3941 proc1 : dnz = 3, o_nz = 2 3942 proc2 : dnz = 1, o_nz = 4 3943 .ve 3944 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3945 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3946 for proc3. i.e we are using 12+15+10=37 storage locations to store 3947 34 values. 3948 3949 When d_nnz, o_nnz parameters are specified, the storage is specified 3950 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3951 In the above case the values for d_nnz,o_nnz are: 3952 .vb 3953 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3954 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3955 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3956 .ve 3957 Here the space allocated is sum of all the above values i.e 34, and 3958 hence pre-allocation is perfect. 3959 3960 Level: intermediate 3961 3962 .keywords: matrix, aij, compressed row, sparse, parallel 3963 3964 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3965 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3966 @*/ 3967 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3968 { 3969 PetscErrorCode ierr; 3970 3971 PetscFunctionBegin; 3972 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3973 PetscValidType(B,1); 3974 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3975 PetscFunctionReturn(0); 3976 } 3977 3978 #undef __FUNCT__ 3979 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3980 /*@ 3981 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3982 CSR format the local rows. 3983 3984 Collective on MPI_Comm 3985 3986 Input Parameters: 3987 + comm - MPI communicator 3988 . m - number of local rows (Cannot be PETSC_DECIDE) 3989 . n - This value should be the same as the local size used in creating the 3990 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3991 calculated if N is given) For square matrices n is almost always m. 3992 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3993 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3994 . i - row indices 3995 . j - column indices 3996 - a - matrix values 3997 3998 Output Parameter: 3999 . mat - the matrix 4000 4001 Level: intermediate 4002 4003 Notes: 4004 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4005 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4006 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4007 4008 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4009 4010 The format which is used for the sparse matrix input, is equivalent to a 4011 row-major ordering.. i.e for the following matrix, the input data expected is 4012 as shown: 4013 4014 1 0 0 4015 2 0 3 P0 4016 ------- 4017 4 5 6 P1 4018 4019 Process0 [P0]: rows_owned=[0,1] 4020 i = {0,1,3} [size = nrow+1 = 2+1] 4021 j = {0,0,2} [size = nz = 6] 4022 v = {1,2,3} [size = nz = 6] 4023 4024 Process1 [P1]: rows_owned=[2] 4025 i = {0,3} [size = nrow+1 = 1+1] 4026 j = {0,1,2} [size = nz = 6] 4027 v = {4,5,6} [size = nz = 6] 4028 4029 .keywords: matrix, aij, compressed row, sparse, parallel 4030 4031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4032 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4033 @*/ 4034 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4035 { 4036 PetscErrorCode ierr; 4037 4038 PetscFunctionBegin; 4039 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4040 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4041 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4042 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4043 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4044 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4045 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4046 PetscFunctionReturn(0); 4047 } 4048 4049 #undef __FUNCT__ 4050 #define __FUNCT__ "MatCreateAIJ" 4051 /*@C 4052 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4053 (the default parallel PETSc format). For good matrix assembly performance 4054 the user should preallocate the matrix storage by setting the parameters 4055 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4056 performance can be increased by more than a factor of 50. 4057 4058 Collective on MPI_Comm 4059 4060 Input Parameters: 4061 + comm - MPI communicator 4062 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4063 This value should be the same as the local size used in creating the 4064 y vector for the matrix-vector product y = Ax. 4065 . n - This value should be the same as the local size used in creating the 4066 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4067 calculated if N is given) For square matrices n is almost always m. 4068 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4069 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4070 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4071 (same value is used for all local rows) 4072 . d_nnz - array containing the number of nonzeros in the various rows of the 4073 DIAGONAL portion of the local submatrix (possibly different for each row) 4074 or NULL, if d_nz is used to specify the nonzero structure. 4075 The size of this array is equal to the number of local rows, i.e 'm'. 4076 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4077 submatrix (same value is used for all local rows). 4078 - o_nnz - array containing the number of nonzeros in the various rows of the 4079 OFF-DIAGONAL portion of the local submatrix (possibly different for 4080 each row) or NULL, if o_nz is used to specify the nonzero 4081 structure. The size of this array is equal to the number 4082 of local rows, i.e 'm'. 4083 4084 Output Parameter: 4085 . A - the matrix 4086 4087 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4088 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4089 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4090 4091 Notes: 4092 If the *_nnz parameter is given then the *_nz parameter is ignored 4093 4094 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4095 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4096 storage requirements for this matrix. 4097 4098 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4099 processor than it must be used on all processors that share the object for 4100 that argument. 4101 4102 The user MUST specify either the local or global matrix dimensions 4103 (possibly both). 4104 4105 The parallel matrix is partitioned across processors such that the 4106 first m0 rows belong to process 0, the next m1 rows belong to 4107 process 1, the next m2 rows belong to process 2 etc.. where 4108 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4109 values corresponding to [m x N] submatrix. 4110 4111 The columns are logically partitioned with the n0 columns belonging 4112 to 0th partition, the next n1 columns belonging to the next 4113 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4114 4115 The DIAGONAL portion of the local submatrix on any given processor 4116 is the submatrix corresponding to the rows and columns m,n 4117 corresponding to the given processor. i.e diagonal matrix on 4118 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4119 etc. The remaining portion of the local submatrix [m x (N-n)] 4120 constitute the OFF-DIAGONAL portion. The example below better 4121 illustrates this concept. 4122 4123 For a square global matrix we define each processor's diagonal portion 4124 to be its local rows and the corresponding columns (a square submatrix); 4125 each processor's off-diagonal portion encompasses the remainder of the 4126 local matrix (a rectangular submatrix). 4127 4128 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4129 4130 When calling this routine with a single process communicator, a matrix of 4131 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4132 type of communicator, use the construction mechanism: 4133 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4134 4135 By default, this format uses inodes (identical nodes) when possible. 4136 We search for consecutive rows with the same nonzero structure, thereby 4137 reusing matrix information to achieve increased efficiency. 4138 4139 Options Database Keys: 4140 + -mat_no_inode - Do not use inodes 4141 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4142 - -mat_aij_oneindex - Internally use indexing starting at 1 4143 rather than 0. Note that when calling MatSetValues(), 4144 the user still MUST index entries starting at 0! 4145 4146 4147 Example usage: 4148 4149 Consider the following 8x8 matrix with 34 non-zero values, that is 4150 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4151 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4152 as follows: 4153 4154 .vb 4155 1 2 0 | 0 3 0 | 0 4 4156 Proc0 0 5 6 | 7 0 0 | 8 0 4157 9 0 10 | 11 0 0 | 12 0 4158 ------------------------------------- 4159 13 0 14 | 15 16 17 | 0 0 4160 Proc1 0 18 0 | 19 20 21 | 0 0 4161 0 0 0 | 22 23 0 | 24 0 4162 ------------------------------------- 4163 Proc2 25 26 27 | 0 0 28 | 29 0 4164 30 0 0 | 31 32 33 | 0 34 4165 .ve 4166 4167 This can be represented as a collection of submatrices as: 4168 4169 .vb 4170 A B C 4171 D E F 4172 G H I 4173 .ve 4174 4175 Where the submatrices A,B,C are owned by proc0, D,E,F are 4176 owned by proc1, G,H,I are owned by proc2. 4177 4178 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4179 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4180 The 'M','N' parameters are 8,8, and have the same values on all procs. 4181 4182 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4183 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4184 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4185 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4186 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4187 matrix, ans [DF] as another SeqAIJ matrix. 4188 4189 When d_nz, o_nz parameters are specified, d_nz storage elements are 4190 allocated for every row of the local diagonal submatrix, and o_nz 4191 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4192 One way to choose d_nz and o_nz is to use the max nonzerors per local 4193 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4194 In this case, the values of d_nz,o_nz are: 4195 .vb 4196 proc0 : dnz = 2, o_nz = 2 4197 proc1 : dnz = 3, o_nz = 2 4198 proc2 : dnz = 1, o_nz = 4 4199 .ve 4200 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4201 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4202 for proc3. i.e we are using 12+15+10=37 storage locations to store 4203 34 values. 4204 4205 When d_nnz, o_nnz parameters are specified, the storage is specified 4206 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4207 In the above case the values for d_nnz,o_nnz are: 4208 .vb 4209 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4210 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4211 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4212 .ve 4213 Here the space allocated is sum of all the above values i.e 34, and 4214 hence pre-allocation is perfect. 4215 4216 Level: intermediate 4217 4218 .keywords: matrix, aij, compressed row, sparse, parallel 4219 4220 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4221 MPIAIJ, MatCreateMPIAIJWithArrays() 4222 @*/ 4223 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4224 { 4225 PetscErrorCode ierr; 4226 PetscMPIInt size; 4227 4228 PetscFunctionBegin; 4229 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4230 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4231 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4232 if (size > 1) { 4233 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4234 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4235 } else { 4236 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4237 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4238 } 4239 PetscFunctionReturn(0); 4240 } 4241 4242 #undef __FUNCT__ 4243 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4244 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4245 { 4246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4247 4248 PetscFunctionBegin; 4249 *Ad = a->A; 4250 *Ao = a->B; 4251 *colmap = a->garray; 4252 PetscFunctionReturn(0); 4253 } 4254 4255 #undef __FUNCT__ 4256 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4257 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4258 { 4259 PetscErrorCode ierr; 4260 PetscInt i; 4261 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4262 4263 PetscFunctionBegin; 4264 if (coloring->ctype == IS_COLORING_GLOBAL) { 4265 ISColoringValue *allcolors,*colors; 4266 ISColoring ocoloring; 4267 4268 /* set coloring for diagonal portion */ 4269 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4270 4271 /* set coloring for off-diagonal portion */ 4272 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4273 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4274 for (i=0; i<a->B->cmap->n; i++) { 4275 colors[i] = allcolors[a->garray[i]]; 4276 } 4277 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4278 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4279 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4280 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4281 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4282 ISColoringValue *colors; 4283 PetscInt *larray; 4284 ISColoring ocoloring; 4285 4286 /* set coloring for diagonal portion */ 4287 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4288 for (i=0; i<a->A->cmap->n; i++) { 4289 larray[i] = i + A->cmap->rstart; 4290 } 4291 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4292 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4293 for (i=0; i<a->A->cmap->n; i++) { 4294 colors[i] = coloring->colors[larray[i]]; 4295 } 4296 ierr = PetscFree(larray);CHKERRQ(ierr); 4297 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4298 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4299 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4300 4301 /* set coloring for off-diagonal portion */ 4302 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4303 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4304 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4305 for (i=0; i<a->B->cmap->n; i++) { 4306 colors[i] = coloring->colors[larray[i]]; 4307 } 4308 ierr = PetscFree(larray);CHKERRQ(ierr); 4309 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4310 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4311 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4312 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4313 PetscFunctionReturn(0); 4314 } 4315 4316 #undef __FUNCT__ 4317 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4318 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4319 { 4320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4321 PetscErrorCode ierr; 4322 4323 PetscFunctionBegin; 4324 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4325 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4326 PetscFunctionReturn(0); 4327 } 4328 4329 #undef __FUNCT__ 4330 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4331 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4332 { 4333 PetscErrorCode ierr; 4334 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4335 PetscInt *indx; 4336 4337 PetscFunctionBegin; 4338 /* This routine will ONLY return MPIAIJ type matrix */ 4339 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4340 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4341 if (n == PETSC_DECIDE) { 4342 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4343 } 4344 /* Check sum(n) = N */ 4345 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4346 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4347 4348 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4349 rstart -= m; 4350 4351 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4352 for (i=0; i<m; i++) { 4353 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4354 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4355 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4356 } 4357 4358 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4359 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4360 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4361 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4362 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4363 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4364 PetscFunctionReturn(0); 4365 } 4366 4367 #undef __FUNCT__ 4368 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4369 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4370 { 4371 PetscErrorCode ierr; 4372 PetscInt m,N,i,rstart,nnz,Ii; 4373 PetscInt *indx; 4374 PetscScalar *values; 4375 4376 PetscFunctionBegin; 4377 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4378 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4379 for (i=0; i<m; i++) { 4380 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4381 Ii = i + rstart; 4382 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4383 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4384 } 4385 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4386 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4387 PetscFunctionReturn(0); 4388 } 4389 4390 #undef __FUNCT__ 4391 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4392 /*@ 4393 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4394 matrices from each processor 4395 4396 Collective on MPI_Comm 4397 4398 Input Parameters: 4399 + comm - the communicators the parallel matrix will live on 4400 . inmat - the input sequential matrices 4401 . n - number of local columns (or PETSC_DECIDE) 4402 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4403 4404 Output Parameter: 4405 . outmat - the parallel matrix generated 4406 4407 Level: advanced 4408 4409 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4410 4411 @*/ 4412 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4413 { 4414 PetscErrorCode ierr; 4415 PetscMPIInt size; 4416 4417 PetscFunctionBegin; 4418 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4419 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4420 if (size == 1) { 4421 if (scall == MAT_INITIAL_MATRIX) { 4422 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4423 } else { 4424 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4425 } 4426 } else { 4427 if (scall == MAT_INITIAL_MATRIX) { 4428 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4429 } 4430 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4431 } 4432 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "MatFileSplit" 4438 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4439 { 4440 PetscErrorCode ierr; 4441 PetscMPIInt rank; 4442 PetscInt m,N,i,rstart,nnz; 4443 size_t len; 4444 const PetscInt *indx; 4445 PetscViewer out; 4446 char *name; 4447 Mat B; 4448 const PetscScalar *values; 4449 4450 PetscFunctionBegin; 4451 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4452 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4453 /* Should this be the type of the diagonal block of A? */ 4454 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4455 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4456 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4457 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4458 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4459 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4460 for (i=0; i<m; i++) { 4461 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4462 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4463 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4464 } 4465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4467 4468 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4469 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4470 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4471 sprintf(name,"%s.%d",outfile,rank); 4472 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4473 ierr = PetscFree(name);CHKERRQ(ierr); 4474 ierr = MatView(B,out);CHKERRQ(ierr); 4475 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4476 ierr = MatDestroy(&B);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4483 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4484 { 4485 PetscErrorCode ierr; 4486 Mat_Merge_SeqsToMPI *merge; 4487 PetscContainer container; 4488 4489 PetscFunctionBegin; 4490 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4491 if (container) { 4492 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4505 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4506 ierr = PetscFree(merge);CHKERRQ(ierr); 4507 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4508 } 4509 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #include <../src/mat/utils/freespace.h> 4514 #include <petscbt.h> 4515 4516 #undef __FUNCT__ 4517 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4518 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4519 { 4520 PetscErrorCode ierr; 4521 MPI_Comm comm; 4522 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4523 PetscMPIInt size,rank,taga,*len_s; 4524 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4525 PetscInt proc,m; 4526 PetscInt **buf_ri,**buf_rj; 4527 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4528 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4529 MPI_Request *s_waits,*r_waits; 4530 MPI_Status *status; 4531 MatScalar *aa=a->a; 4532 MatScalar **abuf_r,*ba_i; 4533 Mat_Merge_SeqsToMPI *merge; 4534 PetscContainer container; 4535 4536 PetscFunctionBegin; 4537 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4538 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4539 4540 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4541 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4542 4543 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4544 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4545 4546 bi = merge->bi; 4547 bj = merge->bj; 4548 buf_ri = merge->buf_ri; 4549 buf_rj = merge->buf_rj; 4550 4551 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4552 owners = merge->rowmap->range; 4553 len_s = merge->len_s; 4554 4555 /* send and recv matrix values */ 4556 /*-----------------------------*/ 4557 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4558 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4559 4560 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4561 for (proc=0,k=0; proc<size; proc++) { 4562 if (!len_s[proc]) continue; 4563 i = owners[proc]; 4564 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4565 k++; 4566 } 4567 4568 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4569 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4570 ierr = PetscFree(status);CHKERRQ(ierr); 4571 4572 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4573 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4574 4575 /* insert mat values of mpimat */ 4576 /*----------------------------*/ 4577 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4578 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4579 4580 for (k=0; k<merge->nrecv; k++) { 4581 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4582 nrows = *(buf_ri_k[k]); 4583 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4584 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4585 } 4586 4587 /* set values of ba */ 4588 m = merge->rowmap->n; 4589 for (i=0; i<m; i++) { 4590 arow = owners[rank] + i; 4591 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4592 bnzi = bi[i+1] - bi[i]; 4593 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4594 4595 /* add local non-zero vals of this proc's seqmat into ba */ 4596 anzi = ai[arow+1] - ai[arow]; 4597 aj = a->j + ai[arow]; 4598 aa = a->a + ai[arow]; 4599 nextaj = 0; 4600 for (j=0; nextaj<anzi; j++) { 4601 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4602 ba_i[j] += aa[nextaj++]; 4603 } 4604 } 4605 4606 /* add received vals into ba */ 4607 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4608 /* i-th row */ 4609 if (i == *nextrow[k]) { 4610 anzi = *(nextai[k]+1) - *nextai[k]; 4611 aj = buf_rj[k] + *(nextai[k]); 4612 aa = abuf_r[k] + *(nextai[k]); 4613 nextaj = 0; 4614 for (j=0; nextaj<anzi; j++) { 4615 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4616 ba_i[j] += aa[nextaj++]; 4617 } 4618 } 4619 nextrow[k]++; nextai[k]++; 4620 } 4621 } 4622 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4623 } 4624 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4625 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4626 4627 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4628 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4629 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4630 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4631 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4632 PetscFunctionReturn(0); 4633 } 4634 4635 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4636 4637 #undef __FUNCT__ 4638 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4639 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4640 { 4641 PetscErrorCode ierr; 4642 Mat B_mpi; 4643 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4644 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4645 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4646 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4647 PetscInt len,proc,*dnz,*onz,bs,cbs; 4648 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4649 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4650 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4651 MPI_Status *status; 4652 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4653 PetscBT lnkbt; 4654 Mat_Merge_SeqsToMPI *merge; 4655 PetscContainer container; 4656 4657 PetscFunctionBegin; 4658 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4659 4660 /* make sure it is a PETSc comm */ 4661 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4662 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4664 4665 ierr = PetscNew(&merge);CHKERRQ(ierr); 4666 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4667 4668 /* determine row ownership */ 4669 /*---------------------------------------------------------*/ 4670 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4672 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4674 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4675 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4676 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4677 4678 m = merge->rowmap->n; 4679 owners = merge->rowmap->range; 4680 4681 /* determine the number of messages to send, their lengths */ 4682 /*---------------------------------------------------------*/ 4683 len_s = merge->len_s; 4684 4685 len = 0; /* length of buf_si[] */ 4686 merge->nsend = 0; 4687 for (proc=0; proc<size; proc++) { 4688 len_si[proc] = 0; 4689 if (proc == rank) { 4690 len_s[proc] = 0; 4691 } else { 4692 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4693 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4694 } 4695 if (len_s[proc]) { 4696 merge->nsend++; 4697 nrows = 0; 4698 for (i=owners[proc]; i<owners[proc+1]; i++) { 4699 if (ai[i+1] > ai[i]) nrows++; 4700 } 4701 len_si[proc] = 2*(nrows+1); 4702 len += len_si[proc]; 4703 } 4704 } 4705 4706 /* determine the number and length of messages to receive for ij-structure */ 4707 /*-------------------------------------------------------------------------*/ 4708 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4709 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4710 4711 /* post the Irecv of j-structure */ 4712 /*-------------------------------*/ 4713 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4714 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4715 4716 /* post the Isend of j-structure */ 4717 /*--------------------------------*/ 4718 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4719 4720 for (proc=0, k=0; proc<size; proc++) { 4721 if (!len_s[proc]) continue; 4722 i = owners[proc]; 4723 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4724 k++; 4725 } 4726 4727 /* receives and sends of j-structure are complete */ 4728 /*------------------------------------------------*/ 4729 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4730 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4731 4732 /* send and recv i-structure */ 4733 /*---------------------------*/ 4734 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4735 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4736 4737 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4738 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4739 for (proc=0,k=0; proc<size; proc++) { 4740 if (!len_s[proc]) continue; 4741 /* form outgoing message for i-structure: 4742 buf_si[0]: nrows to be sent 4743 [1:nrows]: row index (global) 4744 [nrows+1:2*nrows+1]: i-structure index 4745 */ 4746 /*-------------------------------------------*/ 4747 nrows = len_si[proc]/2 - 1; 4748 buf_si_i = buf_si + nrows+1; 4749 buf_si[0] = nrows; 4750 buf_si_i[0] = 0; 4751 nrows = 0; 4752 for (i=owners[proc]; i<owners[proc+1]; i++) { 4753 anzi = ai[i+1] - ai[i]; 4754 if (anzi) { 4755 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4756 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4757 nrows++; 4758 } 4759 } 4760 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4761 k++; 4762 buf_si += len_si[proc]; 4763 } 4764 4765 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4766 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4767 4768 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4769 for (i=0; i<merge->nrecv; i++) { 4770 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4771 } 4772 4773 ierr = PetscFree(len_si);CHKERRQ(ierr); 4774 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4775 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4776 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4777 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4778 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4779 ierr = PetscFree(status);CHKERRQ(ierr); 4780 4781 /* compute a local seq matrix in each processor */ 4782 /*----------------------------------------------*/ 4783 /* allocate bi array and free space for accumulating nonzero column info */ 4784 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4785 bi[0] = 0; 4786 4787 /* create and initialize a linked list */ 4788 nlnk = N+1; 4789 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4790 4791 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4792 len = ai[owners[rank+1]] - ai[owners[rank]]; 4793 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4794 4795 current_space = free_space; 4796 4797 /* determine symbolic info for each local row */ 4798 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4799 4800 for (k=0; k<merge->nrecv; k++) { 4801 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4802 nrows = *buf_ri_k[k]; 4803 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4804 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4805 } 4806 4807 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4808 len = 0; 4809 for (i=0; i<m; i++) { 4810 bnzi = 0; 4811 /* add local non-zero cols of this proc's seqmat into lnk */ 4812 arow = owners[rank] + i; 4813 anzi = ai[arow+1] - ai[arow]; 4814 aj = a->j + ai[arow]; 4815 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4816 bnzi += nlnk; 4817 /* add received col data into lnk */ 4818 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4819 if (i == *nextrow[k]) { /* i-th row */ 4820 anzi = *(nextai[k]+1) - *nextai[k]; 4821 aj = buf_rj[k] + *nextai[k]; 4822 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4823 bnzi += nlnk; 4824 nextrow[k]++; nextai[k]++; 4825 } 4826 } 4827 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4828 4829 /* if free space is not available, make more free space */ 4830 if (current_space->local_remaining<bnzi) { 4831 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4832 nspacedouble++; 4833 } 4834 /* copy data into free space, then initialize lnk */ 4835 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4836 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4837 4838 current_space->array += bnzi; 4839 current_space->local_used += bnzi; 4840 current_space->local_remaining -= bnzi; 4841 4842 bi[i+1] = bi[i] + bnzi; 4843 } 4844 4845 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4846 4847 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4848 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4849 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4850 4851 /* create symbolic parallel matrix B_mpi */ 4852 /*---------------------------------------*/ 4853 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4854 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4855 if (n==PETSC_DECIDE) { 4856 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4857 } else { 4858 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4859 } 4860 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4861 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4862 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4863 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4864 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4865 4866 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4867 B_mpi->assembled = PETSC_FALSE; 4868 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4869 merge->bi = bi; 4870 merge->bj = bj; 4871 merge->buf_ri = buf_ri; 4872 merge->buf_rj = buf_rj; 4873 merge->coi = NULL; 4874 merge->coj = NULL; 4875 merge->owners_co = NULL; 4876 4877 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4878 4879 /* attach the supporting struct to B_mpi for reuse */ 4880 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4881 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4882 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4883 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4884 *mpimat = B_mpi; 4885 4886 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4892 /*@C 4893 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4894 matrices from each processor 4895 4896 Collective on MPI_Comm 4897 4898 Input Parameters: 4899 + comm - the communicators the parallel matrix will live on 4900 . seqmat - the input sequential matrices 4901 . m - number of local rows (or PETSC_DECIDE) 4902 . n - number of local columns (or PETSC_DECIDE) 4903 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4904 4905 Output Parameter: 4906 . mpimat - the parallel matrix generated 4907 4908 Level: advanced 4909 4910 Notes: 4911 The dimensions of the sequential matrix in each processor MUST be the same. 4912 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4913 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4914 @*/ 4915 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4916 { 4917 PetscErrorCode ierr; 4918 PetscMPIInt size; 4919 4920 PetscFunctionBegin; 4921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4922 if (size == 1) { 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4928 } 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 if (scall == MAT_INITIAL_MATRIX) { 4934 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4935 } 4936 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4937 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4943 /*@ 4944 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4945 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4946 with MatGetSize() 4947 4948 Not Collective 4949 4950 Input Parameters: 4951 + A - the matrix 4952 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4953 4954 Output Parameter: 4955 . A_loc - the local sequential matrix generated 4956 4957 Level: developer 4958 4959 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4960 4961 @*/ 4962 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4963 { 4964 PetscErrorCode ierr; 4965 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4966 Mat_SeqAIJ *mat,*a,*b; 4967 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4968 MatScalar *aa,*ba,*cam; 4969 PetscScalar *ca; 4970 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4971 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4972 PetscBool match; 4973 4974 PetscFunctionBegin; 4975 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4976 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4977 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4978 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4979 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4980 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4981 aa = a->a; ba = b->a; 4982 if (scall == MAT_INITIAL_MATRIX) { 4983 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4984 ci[0] = 0; 4985 for (i=0; i<am; i++) { 4986 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4987 } 4988 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4989 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4990 k = 0; 4991 for (i=0; i<am; i++) { 4992 ncols_o = bi[i+1] - bi[i]; 4993 ncols_d = ai[i+1] - ai[i]; 4994 /* off-diagonal portion of A */ 4995 for (jo=0; jo<ncols_o; jo++) { 4996 col = cmap[*bj]; 4997 if (col >= cstart) break; 4998 cj[k] = col; bj++; 4999 ca[k++] = *ba++; 5000 } 5001 /* diagonal portion of A */ 5002 for (j=0; j<ncols_d; j++) { 5003 cj[k] = cstart + *aj++; 5004 ca[k++] = *aa++; 5005 } 5006 /* off-diagonal portion of A */ 5007 for (j=jo; j<ncols_o; j++) { 5008 cj[k] = cmap[*bj++]; 5009 ca[k++] = *ba++; 5010 } 5011 } 5012 /* put together the new matrix */ 5013 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5014 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5015 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5016 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5017 mat->free_a = PETSC_TRUE; 5018 mat->free_ij = PETSC_TRUE; 5019 mat->nonew = 0; 5020 } else if (scall == MAT_REUSE_MATRIX) { 5021 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5022 ci = mat->i; cj = mat->j; cam = mat->a; 5023 for (i=0; i<am; i++) { 5024 /* off-diagonal portion of A */ 5025 ncols_o = bi[i+1] - bi[i]; 5026 for (jo=0; jo<ncols_o; jo++) { 5027 col = cmap[*bj]; 5028 if (col >= cstart) break; 5029 *cam++ = *ba++; bj++; 5030 } 5031 /* diagonal portion of A */ 5032 ncols_d = ai[i+1] - ai[i]; 5033 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5034 /* off-diagonal portion of A */ 5035 for (j=jo; j<ncols_o; j++) { 5036 *cam++ = *ba++; bj++; 5037 } 5038 } 5039 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5040 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5041 PetscFunctionReturn(0); 5042 } 5043 5044 #undef __FUNCT__ 5045 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5046 /*@C 5047 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5048 5049 Not Collective 5050 5051 Input Parameters: 5052 + A - the matrix 5053 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5054 - row, col - index sets of rows and columns to extract (or NULL) 5055 5056 Output Parameter: 5057 . A_loc - the local sequential matrix generated 5058 5059 Level: developer 5060 5061 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5062 5063 @*/ 5064 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5065 { 5066 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5067 PetscErrorCode ierr; 5068 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5069 IS isrowa,iscola; 5070 Mat *aloc; 5071 PetscBool match; 5072 5073 PetscFunctionBegin; 5074 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5075 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5076 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5077 if (!row) { 5078 start = A->rmap->rstart; end = A->rmap->rend; 5079 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5080 } else { 5081 isrowa = *row; 5082 } 5083 if (!col) { 5084 start = A->cmap->rstart; 5085 cmap = a->garray; 5086 nzA = a->A->cmap->n; 5087 nzB = a->B->cmap->n; 5088 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5089 ncols = 0; 5090 for (i=0; i<nzB; i++) { 5091 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5092 else break; 5093 } 5094 imark = i; 5095 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5096 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5097 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5098 } else { 5099 iscola = *col; 5100 } 5101 if (scall != MAT_INITIAL_MATRIX) { 5102 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5103 aloc[0] = *A_loc; 5104 } 5105 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5106 *A_loc = aloc[0]; 5107 ierr = PetscFree(aloc);CHKERRQ(ierr); 5108 if (!row) { 5109 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5110 } 5111 if (!col) { 5112 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5113 } 5114 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5115 PetscFunctionReturn(0); 5116 } 5117 5118 #undef __FUNCT__ 5119 #define __FUNCT__ "MatGetBrowsOfAcols" 5120 /*@C 5121 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5122 5123 Collective on Mat 5124 5125 Input Parameters: 5126 + A,B - the matrices in mpiaij format 5127 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5128 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5129 5130 Output Parameter: 5131 + rowb, colb - index sets of rows and columns of B to extract 5132 - B_seq - the sequential matrix generated 5133 5134 Level: developer 5135 5136 @*/ 5137 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5138 { 5139 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5140 PetscErrorCode ierr; 5141 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5142 IS isrowb,iscolb; 5143 Mat *bseq=NULL; 5144 5145 PetscFunctionBegin; 5146 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5147 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5148 } 5149 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5150 5151 if (scall == MAT_INITIAL_MATRIX) { 5152 start = A->cmap->rstart; 5153 cmap = a->garray; 5154 nzA = a->A->cmap->n; 5155 nzB = a->B->cmap->n; 5156 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5157 ncols = 0; 5158 for (i=0; i<nzB; i++) { /* row < local row index */ 5159 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5160 else break; 5161 } 5162 imark = i; 5163 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5164 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5165 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5166 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5167 } else { 5168 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5169 isrowb = *rowb; iscolb = *colb; 5170 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5171 bseq[0] = *B_seq; 5172 } 5173 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5174 *B_seq = bseq[0]; 5175 ierr = PetscFree(bseq);CHKERRQ(ierr); 5176 if (!rowb) { 5177 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5178 } else { 5179 *rowb = isrowb; 5180 } 5181 if (!colb) { 5182 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5183 } else { 5184 *colb = iscolb; 5185 } 5186 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5187 PetscFunctionReturn(0); 5188 } 5189 5190 #undef __FUNCT__ 5191 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5192 /* 5193 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5194 of the OFF-DIAGONAL portion of local A 5195 5196 Collective on Mat 5197 5198 Input Parameters: 5199 + A,B - the matrices in mpiaij format 5200 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5201 5202 Output Parameter: 5203 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5204 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5205 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5206 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5207 5208 Level: developer 5209 5210 */ 5211 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5212 { 5213 VecScatter_MPI_General *gen_to,*gen_from; 5214 PetscErrorCode ierr; 5215 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5216 Mat_SeqAIJ *b_oth; 5217 VecScatter ctx =a->Mvctx; 5218 MPI_Comm comm; 5219 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5220 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5221 PetscScalar *rvalues,*svalues; 5222 MatScalar *b_otha,*bufa,*bufA; 5223 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5224 MPI_Request *rwaits = NULL,*swaits = NULL; 5225 MPI_Status *sstatus,rstatus; 5226 PetscMPIInt jj; 5227 PetscInt *cols,sbs,rbs; 5228 PetscScalar *vals; 5229 5230 PetscFunctionBegin; 5231 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5232 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5233 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5234 } 5235 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5236 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5237 5238 gen_to = (VecScatter_MPI_General*)ctx->todata; 5239 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5240 rvalues = gen_from->values; /* holds the length of receiving row */ 5241 svalues = gen_to->values; /* holds the length of sending row */ 5242 nrecvs = gen_from->n; 5243 nsends = gen_to->n; 5244 5245 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5246 srow = gen_to->indices; /* local row index to be sent */ 5247 sstarts = gen_to->starts; 5248 sprocs = gen_to->procs; 5249 sstatus = gen_to->sstatus; 5250 sbs = gen_to->bs; 5251 rstarts = gen_from->starts; 5252 rprocs = gen_from->procs; 5253 rbs = gen_from->bs; 5254 5255 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5256 if (scall == MAT_INITIAL_MATRIX) { 5257 /* i-array */ 5258 /*---------*/ 5259 /* post receives */ 5260 for (i=0; i<nrecvs; i++) { 5261 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5262 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5263 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5264 } 5265 5266 /* pack the outgoing message */ 5267 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5268 5269 sstartsj[0] = 0; 5270 rstartsj[0] = 0; 5271 len = 0; /* total length of j or a array to be sent */ 5272 k = 0; 5273 for (i=0; i<nsends; i++) { 5274 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5275 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5276 for (j=0; j<nrows; j++) { 5277 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5278 for (l=0; l<sbs; l++) { 5279 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5280 5281 rowlen[j*sbs+l] = ncols; 5282 5283 len += ncols; 5284 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5285 } 5286 k++; 5287 } 5288 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5289 5290 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5291 } 5292 /* recvs and sends of i-array are completed */ 5293 i = nrecvs; 5294 while (i--) { 5295 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5296 } 5297 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5298 5299 /* allocate buffers for sending j and a arrays */ 5300 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5301 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5302 5303 /* create i-array of B_oth */ 5304 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5305 5306 b_othi[0] = 0; 5307 len = 0; /* total length of j or a array to be received */ 5308 k = 0; 5309 for (i=0; i<nrecvs; i++) { 5310 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5311 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5312 for (j=0; j<nrows; j++) { 5313 b_othi[k+1] = b_othi[k] + rowlen[j]; 5314 len += rowlen[j]; k++; 5315 } 5316 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5317 } 5318 5319 /* allocate space for j and a arrrays of B_oth */ 5320 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5321 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5322 5323 /* j-array */ 5324 /*---------*/ 5325 /* post receives of j-array */ 5326 for (i=0; i<nrecvs; i++) { 5327 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5328 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message j-array */ 5332 k = 0; 5333 for (i=0; i<nsends; i++) { 5334 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5335 bufJ = bufj+sstartsj[i]; 5336 for (j=0; j<nrows; j++) { 5337 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5338 for (ll=0; ll<sbs; ll++) { 5339 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5340 for (l=0; l<ncols; l++) { 5341 *bufJ++ = cols[l]; 5342 } 5343 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5344 } 5345 } 5346 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5347 } 5348 5349 /* recvs and sends of j-array are completed */ 5350 i = nrecvs; 5351 while (i--) { 5352 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5353 } 5354 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5355 } else if (scall == MAT_REUSE_MATRIX) { 5356 sstartsj = *startsj_s; 5357 rstartsj = *startsj_r; 5358 bufa = *bufa_ptr; 5359 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5360 b_otha = b_oth->a; 5361 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5362 5363 /* a-array */ 5364 /*---------*/ 5365 /* post receives of a-array */ 5366 for (i=0; i<nrecvs; i++) { 5367 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5368 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5369 } 5370 5371 /* pack the outgoing message a-array */ 5372 k = 0; 5373 for (i=0; i<nsends; i++) { 5374 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5375 bufA = bufa+sstartsj[i]; 5376 for (j=0; j<nrows; j++) { 5377 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5378 for (ll=0; ll<sbs; ll++) { 5379 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5380 for (l=0; l<ncols; l++) { 5381 *bufA++ = vals[l]; 5382 } 5383 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5384 } 5385 } 5386 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5387 } 5388 /* recvs and sends of a-array are completed */ 5389 i = nrecvs; 5390 while (i--) { 5391 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5392 } 5393 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5394 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5395 5396 if (scall == MAT_INITIAL_MATRIX) { 5397 /* put together the new matrix */ 5398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5399 5400 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5401 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5402 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5403 b_oth->free_a = PETSC_TRUE; 5404 b_oth->free_ij = PETSC_TRUE; 5405 b_oth->nonew = 0; 5406 5407 ierr = PetscFree(bufj);CHKERRQ(ierr); 5408 if (!startsj_s || !bufa_ptr) { 5409 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5410 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5411 } else { 5412 *startsj_s = sstartsj; 5413 *startsj_r = rstartsj; 5414 *bufa_ptr = bufa; 5415 } 5416 } 5417 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5418 PetscFunctionReturn(0); 5419 } 5420 5421 #undef __FUNCT__ 5422 #define __FUNCT__ "MatGetCommunicationStructs" 5423 /*@C 5424 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5425 5426 Not Collective 5427 5428 Input Parameters: 5429 . A - The matrix in mpiaij format 5430 5431 Output Parameter: 5432 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5433 . colmap - A map from global column index to local index into lvec 5434 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5435 5436 Level: developer 5437 5438 @*/ 5439 #if defined(PETSC_USE_CTABLE) 5440 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5441 #else 5442 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5443 #endif 5444 { 5445 Mat_MPIAIJ *a; 5446 5447 PetscFunctionBegin; 5448 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5449 PetscValidPointer(lvec, 2); 5450 PetscValidPointer(colmap, 3); 5451 PetscValidPointer(multScatter, 4); 5452 a = (Mat_MPIAIJ*) A->data; 5453 if (lvec) *lvec = a->lvec; 5454 if (colmap) *colmap = a->colmap; 5455 if (multScatter) *multScatter = a->Mvctx; 5456 PetscFunctionReturn(0); 5457 } 5458 5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5462 5463 #undef __FUNCT__ 5464 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5465 /* 5466 Computes (B'*A')' since computing B*A directly is untenable 5467 5468 n p p 5469 ( ) ( ) ( ) 5470 m ( A ) * n ( B ) = m ( C ) 5471 ( ) ( ) ( ) 5472 5473 */ 5474 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5475 { 5476 PetscErrorCode ierr; 5477 Mat At,Bt,Ct; 5478 5479 PetscFunctionBegin; 5480 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5481 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5482 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5483 ierr = MatDestroy(&At);CHKERRQ(ierr); 5484 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5485 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5486 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5487 PetscFunctionReturn(0); 5488 } 5489 5490 #undef __FUNCT__ 5491 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5492 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5493 { 5494 PetscErrorCode ierr; 5495 PetscInt m=A->rmap->n,n=B->cmap->n; 5496 Mat Cmat; 5497 5498 PetscFunctionBegin; 5499 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5500 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5501 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5502 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5503 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5504 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5505 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5507 5508 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5509 5510 *C = Cmat; 5511 PetscFunctionReturn(0); 5512 } 5513 5514 /* ----------------------------------------------------------------*/ 5515 #undef __FUNCT__ 5516 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5517 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5518 { 5519 PetscErrorCode ierr; 5520 5521 PetscFunctionBegin; 5522 if (scall == MAT_INITIAL_MATRIX) { 5523 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5524 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5525 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5526 } 5527 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5528 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5529 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5530 PetscFunctionReturn(0); 5531 } 5532 5533 #if defined(PETSC_HAVE_MUMPS) 5534 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5535 #endif 5536 #if defined(PETSC_HAVE_PASTIX) 5537 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5538 #endif 5539 #if defined(PETSC_HAVE_SUPERLU_DIST) 5540 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5541 #endif 5542 #if defined(PETSC_HAVE_CLIQUE) 5543 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5544 #endif 5545 5546 /*MC 5547 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5548 5549 Options Database Keys: 5550 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5551 5552 Level: beginner 5553 5554 .seealso: MatCreateAIJ() 5555 M*/ 5556 5557 #undef __FUNCT__ 5558 #define __FUNCT__ "MatCreate_MPIAIJ" 5559 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5560 { 5561 Mat_MPIAIJ *b; 5562 PetscErrorCode ierr; 5563 PetscMPIInt size; 5564 5565 PetscFunctionBegin; 5566 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5567 5568 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5569 B->data = (void*)b; 5570 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5571 B->assembled = PETSC_FALSE; 5572 B->insertmode = NOT_SET_VALUES; 5573 b->size = size; 5574 5575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5576 5577 /* build cache for off array entries formed */ 5578 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5579 5580 b->donotstash = PETSC_FALSE; 5581 b->colmap = 0; 5582 b->garray = 0; 5583 b->roworiented = PETSC_TRUE; 5584 5585 /* stuff used for matrix vector multiply */ 5586 b->lvec = NULL; 5587 b->Mvctx = NULL; 5588 5589 /* stuff for MatGetRow() */ 5590 b->rowindices = 0; 5591 b->rowvalues = 0; 5592 b->getrowactive = PETSC_FALSE; 5593 5594 /* flexible pointer used in CUSP/CUSPARSE classes */ 5595 b->spptr = NULL; 5596 5597 #if defined(PETSC_HAVE_MUMPS) 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5599 #endif 5600 #if defined(PETSC_HAVE_PASTIX) 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5602 #endif 5603 #if defined(PETSC_HAVE_SUPERLU_DIST) 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5605 #endif 5606 #if defined(PETSC_HAVE_CLIQUE) 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5608 #endif 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5622 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5623 PetscFunctionReturn(0); 5624 } 5625 5626 #undef __FUNCT__ 5627 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5628 /*@ 5629 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5630 and "off-diagonal" part of the matrix in CSR format. 5631 5632 Collective on MPI_Comm 5633 5634 Input Parameters: 5635 + comm - MPI communicator 5636 . m - number of local rows (Cannot be PETSC_DECIDE) 5637 . n - This value should be the same as the local size used in creating the 5638 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5639 calculated if N is given) For square matrices n is almost always m. 5640 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5641 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5642 . i - row indices for "diagonal" portion of matrix 5643 . j - column indices 5644 . a - matrix values 5645 . oi - row indices for "off-diagonal" portion of matrix 5646 . oj - column indices 5647 - oa - matrix values 5648 5649 Output Parameter: 5650 . mat - the matrix 5651 5652 Level: advanced 5653 5654 Notes: 5655 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5656 must free the arrays once the matrix has been destroyed and not before. 5657 5658 The i and j indices are 0 based 5659 5660 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5661 5662 This sets local rows and cannot be used to set off-processor values. 5663 5664 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5665 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5666 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5667 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5668 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5669 communication if it is known that only local entries will be set. 5670 5671 .keywords: matrix, aij, compressed row, sparse, parallel 5672 5673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5674 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5675 @*/ 5676 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5677 { 5678 PetscErrorCode ierr; 5679 Mat_MPIAIJ *maij; 5680 5681 PetscFunctionBegin; 5682 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5683 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5684 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5685 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5686 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5687 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5688 maij = (Mat_MPIAIJ*) (*mat)->data; 5689 5690 (*mat)->preallocated = PETSC_TRUE; 5691 5692 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5693 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5694 5695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5696 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5697 5698 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5702 5703 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5706 PetscFunctionReturn(0); 5707 } 5708 5709 /* 5710 Special version for direct calls from Fortran 5711 */ 5712 #include <petsc-private/fortranimpl.h> 5713 5714 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5715 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5716 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5717 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5718 #endif 5719 5720 /* Change these macros so can be used in void function */ 5721 #undef CHKERRQ 5722 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5723 #undef SETERRQ2 5724 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5725 #undef SETERRQ3 5726 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5727 #undef SETERRQ 5728 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5729 5730 #undef __FUNCT__ 5731 #define __FUNCT__ "matsetvaluesmpiaij_" 5732 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5733 { 5734 Mat mat = *mmat; 5735 PetscInt m = *mm, n = *mn; 5736 InsertMode addv = *maddv; 5737 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5738 PetscScalar value; 5739 PetscErrorCode ierr; 5740 5741 MatCheckPreallocated(mat,1); 5742 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5743 5744 #if defined(PETSC_USE_DEBUG) 5745 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5746 #endif 5747 { 5748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5750 PetscBool roworiented = aij->roworiented; 5751 5752 /* Some Variables required in the macro */ 5753 Mat A = aij->A; 5754 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5755 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5756 MatScalar *aa = a->a; 5757 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5758 Mat B = aij->B; 5759 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5760 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5761 MatScalar *ba = b->a; 5762 5763 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5764 PetscInt nonew = a->nonew; 5765 MatScalar *ap1,*ap2; 5766 5767 PetscFunctionBegin; 5768 for (i=0; i<m; i++) { 5769 if (im[i] < 0) continue; 5770 #if defined(PETSC_USE_DEBUG) 5771 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5772 #endif 5773 if (im[i] >= rstart && im[i] < rend) { 5774 row = im[i] - rstart; 5775 lastcol1 = -1; 5776 rp1 = aj + ai[row]; 5777 ap1 = aa + ai[row]; 5778 rmax1 = aimax[row]; 5779 nrow1 = ailen[row]; 5780 low1 = 0; 5781 high1 = nrow1; 5782 lastcol2 = -1; 5783 rp2 = bj + bi[row]; 5784 ap2 = ba + bi[row]; 5785 rmax2 = bimax[row]; 5786 nrow2 = bilen[row]; 5787 low2 = 0; 5788 high2 = nrow2; 5789 5790 for (j=0; j<n; j++) { 5791 if (roworiented) value = v[i*n+j]; 5792 else value = v[i+j*m]; 5793 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5794 if (in[j] >= cstart && in[j] < cend) { 5795 col = in[j] - cstart; 5796 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5797 } else if (in[j] < 0) continue; 5798 #if defined(PETSC_USE_DEBUG) 5799 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5800 #endif 5801 else { 5802 if (mat->was_assembled) { 5803 if (!aij->colmap) { 5804 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5805 } 5806 #if defined(PETSC_USE_CTABLE) 5807 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5808 col--; 5809 #else 5810 col = aij->colmap[in[j]] - 1; 5811 #endif 5812 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5813 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5814 col = in[j]; 5815 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5816 B = aij->B; 5817 b = (Mat_SeqAIJ*)B->data; 5818 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5819 rp2 = bj + bi[row]; 5820 ap2 = ba + bi[row]; 5821 rmax2 = bimax[row]; 5822 nrow2 = bilen[row]; 5823 low2 = 0; 5824 high2 = nrow2; 5825 bm = aij->B->rmap->n; 5826 ba = b->a; 5827 } 5828 } else col = in[j]; 5829 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5830 } 5831 } 5832 } else if (!aij->donotstash) { 5833 if (roworiented) { 5834 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5835 } else { 5836 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5837 } 5838 } 5839 } 5840 } 5841 PetscFunctionReturnVoid(); 5842 } 5843 5844