1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 if (v) PetscValidScalarPointer(v,6); 502 for (i=0; i<m; i++) { 503 if (im[i] < 0) continue; 504 #if defined(PETSC_USE_DEBUG) 505 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 506 #endif 507 if (im[i] >= rstart && im[i] < rend) { 508 row = im[i] - rstart; 509 lastcol1 = -1; 510 rp1 = aj + ai[row]; 511 ap1 = aa + ai[row]; 512 rmax1 = aimax[row]; 513 nrow1 = ailen[row]; 514 low1 = 0; 515 high1 = nrow1; 516 lastcol2 = -1; 517 rp2 = bj + bi[row]; 518 ap2 = ba + bi[row]; 519 rmax2 = bimax[row]; 520 nrow2 = bilen[row]; 521 low2 = 0; 522 high2 = nrow2; 523 524 for (j=0; j<n; j++) { 525 if (v) { 526 if (roworiented) value = v[i*n+j]; 527 else value = v[i+j*m]; 528 } else value = 0.0; 529 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 530 if (in[j] >= cstart && in[j] < cend) { 531 col = in[j] - cstart; 532 nonew = a->nonew; 533 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 534 } else if (in[j] < 0) continue; 535 #if defined(PETSC_USE_DEBUG) 536 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 537 #endif 538 else { 539 if (mat->was_assembled) { 540 if (!aij->colmap) { 541 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 542 } 543 #if defined(PETSC_USE_CTABLE) 544 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 545 col--; 546 #else 547 col = aij->colmap[in[j]] - 1; 548 #endif 549 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 550 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 551 col = in[j]; 552 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 553 B = aij->B; 554 b = (Mat_SeqAIJ*)B->data; 555 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 556 rp2 = bj + bi[row]; 557 ap2 = ba + bi[row]; 558 rmax2 = bimax[row]; 559 nrow2 = bilen[row]; 560 low2 = 0; 561 high2 = nrow2; 562 bm = aij->B->rmap->n; 563 ba = b->a; 564 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 565 } else col = in[j]; 566 nonew = b->nonew; 567 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 568 } 569 } 570 } else { 571 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 572 if (!aij->donotstash) { 573 mat->assembled = PETSC_FALSE; 574 if (roworiented) { 575 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 576 } else { 577 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 578 } 579 } 580 } 581 } 582 PetscFunctionReturn(0); 583 } 584 585 #undef __FUNCT__ 586 #define __FUNCT__ "MatGetValues_MPIAIJ" 587 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 588 { 589 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 590 PetscErrorCode ierr; 591 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 592 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 593 594 PetscFunctionBegin; 595 for (i=0; i<m; i++) { 596 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 597 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 598 if (idxm[i] >= rstart && idxm[i] < rend) { 599 row = idxm[i] - rstart; 600 for (j=0; j<n; j++) { 601 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 602 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 603 if (idxn[j] >= cstart && idxn[j] < cend) { 604 col = idxn[j] - cstart; 605 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 606 } else { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[idxn[j]] - 1; 615 #endif 616 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 617 else { 618 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 619 } 620 } 621 } 622 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 628 629 #undef __FUNCT__ 630 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 631 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 632 { 633 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 634 PetscErrorCode ierr; 635 PetscInt nstash,reallocs; 636 InsertMode addv; 637 638 PetscFunctionBegin; 639 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 640 641 /* make sure all processors are either in INSERTMODE or ADDMODE */ 642 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 643 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 644 mat->insertmode = addv; /* in case this processor had no cache */ 645 646 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 647 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 648 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 654 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 655 { 656 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 658 PetscErrorCode ierr; 659 PetscMPIInt n; 660 PetscInt i,j,rstart,ncols,flg; 661 PetscInt *row,*col; 662 PetscBool other_disassembled; 663 PetscScalar *val; 664 InsertMode addv = mat->insertmode; 665 666 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 667 668 PetscFunctionBegin; 669 if (!aij->donotstash && !mat->nooffprocentries) { 670 while (1) { 671 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 672 if (!flg) break; 673 674 for (i=0; i<n; ) { 675 /* Now identify the consecutive vals belonging to the same row */ 676 for (j=i,rstart=row[j]; j<n; j++) { 677 if (row[j] != rstart) break; 678 } 679 if (j < n) ncols = j-i; 680 else ncols = n-i; 681 /* Now assemble all these values with a single function call */ 682 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 683 684 i = j; 685 } 686 } 687 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 688 } 689 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 690 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 691 692 /* determine if any processor has disassembled, if so we must 693 also disassemble ourselfs, in order that we may reassemble. */ 694 /* 695 if nonzero structure of submatrix B cannot change then we know that 696 no processor disassembled thus we can skip this stuff 697 */ 698 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 699 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 700 if (mat->was_assembled && !other_disassembled) { 701 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 702 } 703 } 704 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 705 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 706 } 707 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 708 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 709 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 710 711 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 712 713 aij->rowvalues = 0; 714 715 /* used by MatAXPY() */ 716 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 717 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 718 719 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 720 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 721 722 { 723 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 724 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 725 } 726 PetscFunctionReturn(0); 727 } 728 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 731 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 732 { 733 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 734 PetscErrorCode ierr; 735 736 PetscFunctionBegin; 737 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 738 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_MPIAIJ" 744 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 745 { 746 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 747 PetscInt *owners = A->rmap->range; 748 PetscInt n = A->rmap->n; 749 PetscMPIInt size = mat->size; 750 PetscSF sf; 751 PetscInt *lrows; 752 PetscSFNode *rrows; 753 PetscInt lastidx = -1, r, p = 0, len = 0; 754 PetscErrorCode ierr; 755 756 PetscFunctionBegin; 757 /* Create SF where leaves are input rows and roots are owned rows */ 758 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 759 for (r = 0; r < n; ++r) lrows[r] = -1; 760 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 761 for (r = 0; r < N; ++r) { 762 const PetscInt idx = rows[r]; 763 PetscBool found = PETSC_FALSE; 764 /* Trick for efficient searching for sorted rows */ 765 if (lastidx > idx) p = 0; 766 lastidx = idx; 767 for (; p < size; ++p) { 768 if (idx >= owners[p] && idx < owners[p+1]) { 769 rrows[r].rank = p; 770 rrows[r].index = rows[r] - owners[p]; 771 found = PETSC_TRUE; 772 break; 773 } 774 } 775 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 776 } 777 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 778 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 779 /* Collect flags for rows to be zeroed */ 780 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 781 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 782 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 783 /* Compress and put in row numbers */ 784 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 785 /* fix right hand side if needed */ 786 if (x && b) { 787 const PetscScalar *xx; 788 PetscScalar *bb; 789 790 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 792 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 793 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 794 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 795 } 796 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 797 ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr); 798 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 799 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 800 } else if (diag != 0.0) { 801 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 802 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 803 for (r = 0; r < len; ++r) { 804 const PetscInt row = lrows[r] + A->rmap->rstart; 805 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 806 } 807 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 808 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 809 } else { 810 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 811 } 812 ierr = PetscFree(lrows);CHKERRQ(ierr); 813 { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 #undef __FUNCT__ 821 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 822 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 823 { 824 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 825 PetscErrorCode ierr; 826 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 827 PetscInt i,j,r,m,p = 0,len = 0; 828 PetscInt *lrows,*owners = A->rmap->range; 829 PetscSFNode *rrows; 830 PetscSF sf; 831 const PetscScalar *xx; 832 PetscScalar *bb,*mask; 833 Vec xmask,lmask; 834 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 835 const PetscInt *aj, *ii,*ridx; 836 PetscScalar *aa; 837 #if defined(PETSC_DEBUG) 838 PetscBool found = PETSC_FALSE; 839 #endif 840 841 PetscFunctionBegin; 842 /* Create SF where leaves are input rows and roots are owned rows */ 843 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 844 for (r = 0; r < n; ++r) lrows[r] = -1; 845 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 846 for (r = 0; r < N; ++r) { 847 const PetscInt idx = rows[r]; 848 PetscBool found = PETSC_FALSE; 849 /* Trick for efficient searching for sorted rows */ 850 if (lastidx > idx) p = 0; 851 lastidx = idx; 852 for (; p < size; ++p) { 853 if (idx >= owners[p] && idx < owners[p+1]) { 854 rrows[r].rank = p; 855 rrows[r].index = rows[r] - owners[p]; 856 found = PETSC_TRUE; 857 break; 858 } 859 } 860 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 861 } 862 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 863 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 864 /* Collect flags for rows to be zeroed */ 865 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 867 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 868 /* Compress and put in row numbers */ 869 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 870 /* zero diagonal part of matrix */ 871 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 872 /* handle off diagonal part of matrix */ 873 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 874 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 875 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 876 for (i=0; i<len; i++) bb[lrows[i]] = 1; 877 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 878 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 881 if (x) { 882 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 884 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 885 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 886 } 887 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 888 /* remove zeroed rows of off diagonal matrix */ 889 ii = aij->i; 890 for (i=0; i<len; i++) { 891 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 892 } 893 /* loop over all elements of off process part of matrix zeroing removed columns*/ 894 if (aij->compressedrow.use) { 895 m = aij->compressedrow.nrows; 896 ii = aij->compressedrow.i; 897 ridx = aij->compressedrow.rindex; 898 for (i=0; i<m; i++) { 899 n = ii[i+1] - ii[i]; 900 aj = aij->j + ii[i]; 901 aa = aij->a + ii[i]; 902 903 for (j=0; j<n; j++) { 904 if (PetscAbsScalar(mask[*aj])) { 905 if (b) bb[*ridx] -= *aa*xx[*aj]; 906 *aa = 0.0; 907 } 908 aa++; 909 aj++; 910 } 911 ridx++; 912 } 913 } else { /* do not use compressed row format */ 914 m = l->B->rmap->n; 915 for (i=0; i<m; i++) { 916 n = ii[i+1] - ii[i]; 917 aj = aij->j + ii[i]; 918 aa = aij->a + ii[i]; 919 for (j=0; j<n; j++) { 920 if (PetscAbsScalar(mask[*aj])) { 921 if (b) bb[i] -= *aa*xx[*aj]; 922 *aa = 0.0; 923 } 924 aa++; 925 aj++; 926 } 927 } 928 } 929 if (x) { 930 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 931 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 932 } 933 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 934 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 935 ierr = PetscFree(lrows);CHKERRQ(ierr); 936 PetscFunctionReturn(0); 937 } 938 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatMult_MPIAIJ" 941 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 942 { 943 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 944 PetscErrorCode ierr; 945 PetscInt nt; 946 947 PetscFunctionBegin; 948 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 949 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 950 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 951 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 952 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 959 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultAdd_MPIAIJ" 971 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 979 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 980 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 986 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 987 { 988 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 989 PetscErrorCode ierr; 990 PetscBool merged; 991 992 PetscFunctionBegin; 993 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 994 /* do nondiagonal part */ 995 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 996 if (!merged) { 997 /* send it on its way */ 998 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 /* do local part */ 1000 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1001 /* receive remote parts: note this assumes the values are not actually */ 1002 /* added in yy until the next line, */ 1003 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 } else { 1005 /* do local part */ 1006 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1007 /* send it on its way */ 1008 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 /* values actually were received in the Begin() but we need to call this nop */ 1010 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 } 1012 PetscFunctionReturn(0); 1013 } 1014 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1017 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1018 { 1019 MPI_Comm comm; 1020 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1021 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1022 IS Me,Notme; 1023 PetscErrorCode ierr; 1024 PetscInt M,N,first,last,*notme,i; 1025 PetscMPIInt size; 1026 1027 PetscFunctionBegin; 1028 /* Easy test: symmetric diagonal block */ 1029 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1030 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1031 if (!*f) PetscFunctionReturn(0); 1032 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1034 if (size == 1) PetscFunctionReturn(0); 1035 1036 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1037 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1038 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1039 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1040 for (i=0; i<first; i++) notme[i] = i; 1041 for (i=last; i<M; i++) notme[i-last+first] = i; 1042 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1043 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1044 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1045 Aoff = Aoffs[0]; 1046 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1047 Boff = Boffs[0]; 1048 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1049 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1050 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1051 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1052 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1053 ierr = PetscFree(notme);CHKERRQ(ierr); 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1062 PetscErrorCode ierr; 1063 1064 PetscFunctionBegin; 1065 /* do nondiagonal part */ 1066 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1067 /* send it on its way */ 1068 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1069 /* do local part */ 1070 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1071 /* receive remote parts */ 1072 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 /* 1077 This only works correctly for square matrices where the subblock A->A is the 1078 diagonal block 1079 */ 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1082 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1083 { 1084 PetscErrorCode ierr; 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 1087 PetscFunctionBegin; 1088 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1089 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1090 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatScale_MPIAIJ" 1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1097 { 1098 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1103 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatDestroy_MPIAIJ" 1109 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1110 { 1111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1112 PetscErrorCode ierr; 1113 1114 PetscFunctionBegin; 1115 #if defined(PETSC_USE_LOG) 1116 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1117 #endif 1118 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1119 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1120 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1121 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1122 #if defined(PETSC_USE_CTABLE) 1123 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1124 #else 1125 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1126 #endif 1127 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1128 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1129 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1130 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1131 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1132 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1133 1134 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1135 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1136 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1137 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1138 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1139 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1140 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1141 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1142 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1148 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1149 { 1150 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1151 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1152 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1153 PetscErrorCode ierr; 1154 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1155 int fd; 1156 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1157 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1158 PetscScalar *column_values; 1159 PetscInt message_count,flowcontrolcount; 1160 FILE *file; 1161 1162 PetscFunctionBegin; 1163 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1165 nz = A->nz + B->nz; 1166 if (!rank) { 1167 header[0] = MAT_FILE_CLASSID; 1168 header[1] = mat->rmap->N; 1169 header[2] = mat->cmap->N; 1170 1171 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1173 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1174 /* get largest number of rows any processor has */ 1175 rlen = mat->rmap->n; 1176 range = mat->rmap->range; 1177 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1178 } else { 1179 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1180 rlen = mat->rmap->n; 1181 } 1182 1183 /* load up the local row counts */ 1184 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1185 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1186 1187 /* store the row lengths to the file */ 1188 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1189 if (!rank) { 1190 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 for (i=1; i<size; i++) { 1192 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1193 rlen = range[i+1] - range[i]; 1194 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1195 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1196 } 1197 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1198 } else { 1199 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1200 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1201 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1202 } 1203 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1204 1205 /* load up the local column indices */ 1206 nzmax = nz; /* th processor needs space a largest processor needs */ 1207 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1208 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1209 cnt = 0; 1210 for (i=0; i<mat->rmap->n; i++) { 1211 for (j=B->i[i]; j<B->i[i+1]; j++) { 1212 if ((col = garray[B->j[j]]) > cstart) break; 1213 column_indices[cnt++] = col; 1214 } 1215 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1216 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1217 } 1218 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1219 1220 /* store the column indices to the file */ 1221 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1222 if (!rank) { 1223 MPI_Status status; 1224 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1225 for (i=1; i<size; i++) { 1226 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1227 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1228 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1229 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1230 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1231 } 1232 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1235 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1237 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1238 } 1239 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1240 1241 /* load up the local column values */ 1242 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1243 cnt = 0; 1244 for (i=0; i<mat->rmap->n; i++) { 1245 for (j=B->i[i]; j<B->i[i+1]; j++) { 1246 if (garray[B->j[j]] > cstart) break; 1247 column_values[cnt++] = B->a[j]; 1248 } 1249 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1250 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1251 } 1252 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1253 1254 /* store the column values to the file */ 1255 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1256 if (!rank) { 1257 MPI_Status status; 1258 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1259 for (i=1; i<size; i++) { 1260 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1261 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1262 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1263 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1264 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1265 } 1266 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1267 } else { 1268 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1269 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1270 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1272 } 1273 ierr = PetscFree(column_values);CHKERRQ(ierr); 1274 1275 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1276 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1277 PetscFunctionReturn(0); 1278 } 1279 1280 #include <petscdraw.h> 1281 #undef __FUNCT__ 1282 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1286 PetscErrorCode ierr; 1287 PetscMPIInt rank = aij->rank,size = aij->size; 1288 PetscBool isdraw,iascii,isbinary; 1289 PetscViewer sviewer; 1290 PetscViewerFormat format; 1291 1292 PetscFunctionBegin; 1293 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1294 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1295 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1296 if (iascii) { 1297 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1298 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1299 MatInfo info; 1300 PetscBool inodes; 1301 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1304 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1306 if (!inodes) { 1307 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1308 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1309 } else { 1310 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1311 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1312 } 1313 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1314 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1315 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1316 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1317 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1318 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1319 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1320 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1323 PetscInt inodecount,inodelimit,*inodes; 1324 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1325 if (inodes) { 1326 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1327 } else { 1328 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1329 } 1330 PetscFunctionReturn(0); 1331 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1332 PetscFunctionReturn(0); 1333 } 1334 } else if (isbinary) { 1335 if (size == 1) { 1336 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1337 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1338 } else { 1339 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1340 } 1341 PetscFunctionReturn(0); 1342 } else if (isdraw) { 1343 PetscDraw draw; 1344 PetscBool isnull; 1345 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1346 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1347 } 1348 1349 if (size == 1) { 1350 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1351 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1352 } else { 1353 /* assemble the entire matrix onto first processor. */ 1354 Mat A; 1355 Mat_SeqAIJ *Aloc; 1356 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1357 MatScalar *a; 1358 1359 if (mat->rmap->N > 1024) { 1360 PetscBool flg = PETSC_FALSE; 1361 1362 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1363 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1364 } 1365 1366 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1367 if (!rank) { 1368 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1371 } 1372 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1373 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1374 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1375 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1376 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1377 1378 /* copy over the A part */ 1379 Aloc = (Mat_SeqAIJ*)aij->A->data; 1380 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1381 row = mat->rmap->rstart; 1382 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1383 for (i=0; i<m; i++) { 1384 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1385 row++; 1386 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1387 } 1388 aj = Aloc->j; 1389 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1390 1391 /* copy over the B part */ 1392 Aloc = (Mat_SeqAIJ*)aij->B->data; 1393 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1394 row = mat->rmap->rstart; 1395 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1396 ct = cols; 1397 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1398 for (i=0; i<m; i++) { 1399 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1400 row++; 1401 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1402 } 1403 ierr = PetscFree(ct);CHKERRQ(ierr); 1404 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1405 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1406 /* 1407 Everyone has to call to draw the matrix since the graphics waits are 1408 synchronized across all processors that share the PetscDraw object 1409 */ 1410 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1411 if (!rank) { 1412 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1413 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1414 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1415 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1416 } 1417 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1418 ierr = MatDestroy(&A);CHKERRQ(ierr); 1419 } 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatView_MPIAIJ" 1425 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1426 { 1427 PetscErrorCode ierr; 1428 PetscBool iascii,isdraw,issocket,isbinary; 1429 1430 PetscFunctionBegin; 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1435 if (iascii || isdraw || isbinary || issocket) { 1436 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatSOR_MPIAIJ" 1443 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1444 { 1445 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1446 PetscErrorCode ierr; 1447 Vec bb1 = 0; 1448 PetscBool hasop; 1449 1450 PetscFunctionBegin; 1451 if (flag == SOR_APPLY_UPPER) { 1452 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1453 PetscFunctionReturn(0); 1454 } 1455 1456 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1457 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1458 } 1459 1460 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1461 if (flag & SOR_ZERO_INITIAL_GUESS) { 1462 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1463 its--; 1464 } 1465 1466 while (its--) { 1467 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1468 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1469 1470 /* update rhs: bb1 = bb - B*x */ 1471 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1472 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1473 1474 /* local sweep */ 1475 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1476 } 1477 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1478 if (flag & SOR_ZERO_INITIAL_GUESS) { 1479 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1480 its--; 1481 } 1482 while (its--) { 1483 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1484 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1485 1486 /* update rhs: bb1 = bb - B*x */ 1487 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1488 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1489 1490 /* local sweep */ 1491 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1492 } 1493 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1494 if (flag & SOR_ZERO_INITIAL_GUESS) { 1495 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1496 its--; 1497 } 1498 while (its--) { 1499 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1500 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1501 1502 /* update rhs: bb1 = bb - B*x */ 1503 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1504 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1505 1506 /* local sweep */ 1507 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1508 } 1509 } else if (flag & SOR_EISENSTAT) { 1510 Vec xx1; 1511 1512 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1514 1515 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1516 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1517 if (!mat->diag) { 1518 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1519 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1520 } 1521 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1522 if (hasop) { 1523 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1524 } else { 1525 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1526 } 1527 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1528 1529 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1530 1531 /* local sweep */ 1532 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1533 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1534 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1535 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1536 1537 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1538 PetscFunctionReturn(0); 1539 } 1540 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatPermute_MPIAIJ" 1543 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1544 { 1545 Mat aA,aB,Aperm; 1546 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1547 PetscScalar *aa,*ba; 1548 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1549 PetscSF rowsf,sf; 1550 IS parcolp = NULL; 1551 PetscBool done; 1552 PetscErrorCode ierr; 1553 1554 PetscFunctionBegin; 1555 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1556 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1557 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1558 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1559 1560 /* Invert row permutation to find out where my rows should go */ 1561 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1562 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1563 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1564 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1565 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1566 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1567 1568 /* Invert column permutation to find out where my columns should go */ 1569 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1570 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1571 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1572 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1573 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1574 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1576 1577 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1578 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1579 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1580 1581 /* Find out where my gcols should go */ 1582 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1583 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1584 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1585 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1586 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1587 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1588 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1589 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1590 1591 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1592 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1593 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1594 for (i=0; i<m; i++) { 1595 PetscInt row = rdest[i],rowner; 1596 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1597 for (j=ai[i]; j<ai[i+1]; j++) { 1598 PetscInt cowner,col = cdest[aj[j]]; 1599 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1600 if (rowner == cowner) dnnz[i]++; 1601 else onnz[i]++; 1602 } 1603 for (j=bi[i]; j<bi[i+1]; j++) { 1604 PetscInt cowner,col = gcdest[bj[j]]; 1605 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1606 if (rowner == cowner) dnnz[i]++; 1607 else onnz[i]++; 1608 } 1609 } 1610 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1611 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1612 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1614 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1615 1616 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1617 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1618 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1619 for (i=0; i<m; i++) { 1620 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1621 PetscInt j0,rowlen; 1622 rowlen = ai[i+1] - ai[i]; 1623 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1624 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1625 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1626 } 1627 rowlen = bi[i+1] - bi[i]; 1628 for (j0=j=0; j<rowlen; j0=j) { 1629 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1630 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1631 } 1632 } 1633 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1634 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1635 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1636 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1637 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1638 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1639 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1640 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1641 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1642 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1643 *B = Aperm; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 #undef __FUNCT__ 1648 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1649 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1650 { 1651 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1652 Mat A = mat->A,B = mat->B; 1653 PetscErrorCode ierr; 1654 PetscReal isend[5],irecv[5]; 1655 1656 PetscFunctionBegin; 1657 info->block_size = 1.0; 1658 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1659 1660 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1661 isend[3] = info->memory; isend[4] = info->mallocs; 1662 1663 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1664 1665 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1666 isend[3] += info->memory; isend[4] += info->mallocs; 1667 if (flag == MAT_LOCAL) { 1668 info->nz_used = isend[0]; 1669 info->nz_allocated = isend[1]; 1670 info->nz_unneeded = isend[2]; 1671 info->memory = isend[3]; 1672 info->mallocs = isend[4]; 1673 } else if (flag == MAT_GLOBAL_MAX) { 1674 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1675 1676 info->nz_used = irecv[0]; 1677 info->nz_allocated = irecv[1]; 1678 info->nz_unneeded = irecv[2]; 1679 info->memory = irecv[3]; 1680 info->mallocs = irecv[4]; 1681 } else if (flag == MAT_GLOBAL_SUM) { 1682 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1683 1684 info->nz_used = irecv[0]; 1685 info->nz_allocated = irecv[1]; 1686 info->nz_unneeded = irecv[2]; 1687 info->memory = irecv[3]; 1688 info->mallocs = irecv[4]; 1689 } 1690 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1691 info->fill_ratio_needed = 0; 1692 info->factor_mallocs = 0; 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatSetOption_MPIAIJ" 1698 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1699 { 1700 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1701 PetscErrorCode ierr; 1702 1703 PetscFunctionBegin; 1704 switch (op) { 1705 case MAT_NEW_NONZERO_LOCATIONS: 1706 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1707 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1708 case MAT_KEEP_NONZERO_PATTERN: 1709 case MAT_NEW_NONZERO_LOCATION_ERR: 1710 case MAT_USE_INODES: 1711 case MAT_IGNORE_ZERO_ENTRIES: 1712 MatCheckPreallocated(A,1); 1713 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1714 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1715 break; 1716 case MAT_ROW_ORIENTED: 1717 a->roworiented = flg; 1718 1719 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1720 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1721 break; 1722 case MAT_NEW_DIAGONALS: 1723 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1724 break; 1725 case MAT_IGNORE_OFF_PROC_ENTRIES: 1726 a->donotstash = flg; 1727 break; 1728 case MAT_SPD: 1729 A->spd_set = PETSC_TRUE; 1730 A->spd = flg; 1731 if (flg) { 1732 A->symmetric = PETSC_TRUE; 1733 A->structurally_symmetric = PETSC_TRUE; 1734 A->symmetric_set = PETSC_TRUE; 1735 A->structurally_symmetric_set = PETSC_TRUE; 1736 } 1737 break; 1738 case MAT_SYMMETRIC: 1739 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1740 break; 1741 case MAT_STRUCTURALLY_SYMMETRIC: 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_HERMITIAN: 1745 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1746 break; 1747 case MAT_SYMMETRY_ETERNAL: 1748 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1749 break; 1750 default: 1751 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1752 } 1753 PetscFunctionReturn(0); 1754 } 1755 1756 #undef __FUNCT__ 1757 #define __FUNCT__ "MatGetRow_MPIAIJ" 1758 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1759 { 1760 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1761 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1762 PetscErrorCode ierr; 1763 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1764 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1765 PetscInt *cmap,*idx_p; 1766 1767 PetscFunctionBegin; 1768 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1769 mat->getrowactive = PETSC_TRUE; 1770 1771 if (!mat->rowvalues && (idx || v)) { 1772 /* 1773 allocate enough space to hold information from the longest row. 1774 */ 1775 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1776 PetscInt max = 1,tmp; 1777 for (i=0; i<matin->rmap->n; i++) { 1778 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1779 if (max < tmp) max = tmp; 1780 } 1781 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1782 } 1783 1784 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1785 lrow = row - rstart; 1786 1787 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1788 if (!v) {pvA = 0; pvB = 0;} 1789 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1790 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1791 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1792 nztot = nzA + nzB; 1793 1794 cmap = mat->garray; 1795 if (v || idx) { 1796 if (nztot) { 1797 /* Sort by increasing column numbers, assuming A and B already sorted */ 1798 PetscInt imark = -1; 1799 if (v) { 1800 *v = v_p = mat->rowvalues; 1801 for (i=0; i<nzB; i++) { 1802 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1803 else break; 1804 } 1805 imark = i; 1806 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1807 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1808 } 1809 if (idx) { 1810 *idx = idx_p = mat->rowindices; 1811 if (imark > -1) { 1812 for (i=0; i<imark; i++) { 1813 idx_p[i] = cmap[cworkB[i]]; 1814 } 1815 } else { 1816 for (i=0; i<nzB; i++) { 1817 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1818 else break; 1819 } 1820 imark = i; 1821 } 1822 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1823 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1824 } 1825 } else { 1826 if (idx) *idx = 0; 1827 if (v) *v = 0; 1828 } 1829 } 1830 *nz = nztot; 1831 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1832 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1833 PetscFunctionReturn(0); 1834 } 1835 1836 #undef __FUNCT__ 1837 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1838 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1841 1842 PetscFunctionBegin; 1843 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1844 aij->getrowactive = PETSC_FALSE; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 #undef __FUNCT__ 1849 #define __FUNCT__ "MatNorm_MPIAIJ" 1850 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1851 { 1852 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1853 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1854 PetscErrorCode ierr; 1855 PetscInt i,j,cstart = mat->cmap->rstart; 1856 PetscReal sum = 0.0; 1857 MatScalar *v; 1858 1859 PetscFunctionBegin; 1860 if (aij->size == 1) { 1861 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1862 } else { 1863 if (type == NORM_FROBENIUS) { 1864 v = amat->a; 1865 for (i=0; i<amat->nz; i++) { 1866 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1867 } 1868 v = bmat->a; 1869 for (i=0; i<bmat->nz; i++) { 1870 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1871 } 1872 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1873 *norm = PetscSqrtReal(*norm); 1874 } else if (type == NORM_1) { /* max column norm */ 1875 PetscReal *tmp,*tmp2; 1876 PetscInt *jj,*garray = aij->garray; 1877 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1878 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1879 *norm = 0.0; 1880 v = amat->a; jj = amat->j; 1881 for (j=0; j<amat->nz; j++) { 1882 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1883 } 1884 v = bmat->a; jj = bmat->j; 1885 for (j=0; j<bmat->nz; j++) { 1886 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1887 } 1888 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1889 for (j=0; j<mat->cmap->N; j++) { 1890 if (tmp2[j] > *norm) *norm = tmp2[j]; 1891 } 1892 ierr = PetscFree(tmp);CHKERRQ(ierr); 1893 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1894 } else if (type == NORM_INFINITY) { /* max row norm */ 1895 PetscReal ntemp = 0.0; 1896 for (j=0; j<aij->A->rmap->n; j++) { 1897 v = amat->a + amat->i[j]; 1898 sum = 0.0; 1899 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1900 sum += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a + bmat->i[j]; 1903 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1904 sum += PetscAbsScalar(*v); v++; 1905 } 1906 if (sum > ntemp) ntemp = sum; 1907 } 1908 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1909 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1910 } 1911 PetscFunctionReturn(0); 1912 } 1913 1914 #undef __FUNCT__ 1915 #define __FUNCT__ "MatTranspose_MPIAIJ" 1916 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1917 { 1918 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1919 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1920 PetscErrorCode ierr; 1921 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1922 PetscInt cstart = A->cmap->rstart,ncol; 1923 Mat B; 1924 MatScalar *array; 1925 1926 PetscFunctionBegin; 1927 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1928 1929 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1930 ai = Aloc->i; aj = Aloc->j; 1931 bi = Bloc->i; bj = Bloc->j; 1932 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1933 PetscInt *d_nnz,*g_nnz,*o_nnz; 1934 PetscSFNode *oloc; 1935 PETSC_UNUSED PetscSF sf; 1936 1937 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1938 /* compute d_nnz for preallocation */ 1939 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1940 for (i=0; i<ai[ma]; i++) { 1941 d_nnz[aj[i]]++; 1942 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1943 } 1944 /* compute local off-diagonal contributions */ 1945 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1946 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1947 /* map those to global */ 1948 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1949 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1950 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1951 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1952 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1953 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1954 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1955 1956 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1957 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1958 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1959 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1960 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1961 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1962 } else { 1963 B = *matout; 1964 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1965 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1966 } 1967 1968 /* copy over the A part */ 1969 array = Aloc->a; 1970 row = A->rmap->rstart; 1971 for (i=0; i<ma; i++) { 1972 ncol = ai[i+1]-ai[i]; 1973 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1974 row++; 1975 array += ncol; aj += ncol; 1976 } 1977 aj = Aloc->j; 1978 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1979 1980 /* copy over the B part */ 1981 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1982 array = Bloc->a; 1983 row = A->rmap->rstart; 1984 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1985 cols_tmp = cols; 1986 for (i=0; i<mb; i++) { 1987 ncol = bi[i+1]-bi[i]; 1988 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1989 row++; 1990 array += ncol; cols_tmp += ncol; 1991 } 1992 ierr = PetscFree(cols);CHKERRQ(ierr); 1993 1994 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1995 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1996 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 1997 *matout = B; 1998 } else { 1999 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 #undef __FUNCT__ 2005 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2007 { 2008 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2009 Mat a = aij->A,b = aij->B; 2010 PetscErrorCode ierr; 2011 PetscInt s1,s2,s3; 2012 2013 PetscFunctionBegin; 2014 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2015 if (rr) { 2016 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2017 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2020 } 2021 if (ll) { 2022 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2023 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2024 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2025 } 2026 /* scale the diagonal block */ 2027 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2032 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2039 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2040 { 2041 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2042 PetscErrorCode ierr; 2043 2044 PetscFunctionBegin; 2045 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2046 PetscFunctionReturn(0); 2047 } 2048 2049 #undef __FUNCT__ 2050 #define __FUNCT__ "MatEqual_MPIAIJ" 2051 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2052 { 2053 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2054 Mat a,b,c,d; 2055 PetscBool flg; 2056 PetscErrorCode ierr; 2057 2058 PetscFunctionBegin; 2059 a = matA->A; b = matA->B; 2060 c = matB->A; d = matB->B; 2061 2062 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2063 if (flg) { 2064 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2065 } 2066 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatCopy_MPIAIJ" 2072 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2073 { 2074 PetscErrorCode ierr; 2075 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2076 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2077 2078 PetscFunctionBegin; 2079 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2080 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2081 /* because of the column compression in the off-processor part of the matrix a->B, 2082 the number of columns in a->B and b->B may be different, hence we cannot call 2083 the MatCopy() directly on the two parts. If need be, we can provide a more 2084 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2085 then copying the submatrices */ 2086 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2087 } else { 2088 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2089 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #undef __FUNCT__ 2095 #define __FUNCT__ "MatSetUp_MPIAIJ" 2096 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2097 { 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2109 { 2110 PetscInt i,m=Y->rmap->N; 2111 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2112 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2113 const PetscInt *xi = x->i,*yi = y->i; 2114 2115 PetscFunctionBegin; 2116 /* Set the number of nonzeros in the new matrix */ 2117 for (i=0; i<m; i++) { 2118 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2119 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2120 nnz[i] = 0; 2121 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2122 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2123 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2124 nnz[i]++; 2125 } 2126 for (; k<nzy; k++) nnz[i]++; 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130 2131 #undef __FUNCT__ 2132 #define __FUNCT__ "MatAXPY_MPIAIJ" 2133 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt i; 2137 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2138 PetscBLASInt bnz,one=1; 2139 Mat_SeqAIJ *x,*y; 2140 2141 PetscFunctionBegin; 2142 if (str == SAME_NONZERO_PATTERN) { 2143 PetscScalar alpha = a; 2144 x = (Mat_SeqAIJ*)xx->A->data; 2145 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2146 y = (Mat_SeqAIJ*)yy->A->data; 2147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2148 x = (Mat_SeqAIJ*)xx->B->data; 2149 y = (Mat_SeqAIJ*)yy->B->data; 2150 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2151 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2152 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2153 } else if (str == SUBSET_NONZERO_PATTERN) { 2154 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2155 2156 x = (Mat_SeqAIJ*)xx->B->data; 2157 y = (Mat_SeqAIJ*)yy->B->data; 2158 if (y->xtoy && y->XtoY != xx->B) { 2159 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2160 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2161 } 2162 if (!y->xtoy) { /* get xtoy */ 2163 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2164 y->XtoY = xx->B; 2165 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2166 } 2167 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2168 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2169 } else { 2170 Mat B; 2171 PetscInt *nnz_d,*nnz_o; 2172 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2173 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2174 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2175 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2176 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2177 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2178 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2179 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2180 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2181 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2182 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2183 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2184 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2185 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2186 } 2187 PetscFunctionReturn(0); 2188 } 2189 2190 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2191 2192 #undef __FUNCT__ 2193 #define __FUNCT__ "MatConjugate_MPIAIJ" 2194 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2195 { 2196 #if defined(PETSC_USE_COMPLEX) 2197 PetscErrorCode ierr; 2198 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2199 2200 PetscFunctionBegin; 2201 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2202 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2203 #else 2204 PetscFunctionBegin; 2205 #endif 2206 PetscFunctionReturn(0); 2207 } 2208 2209 #undef __FUNCT__ 2210 #define __FUNCT__ "MatRealPart_MPIAIJ" 2211 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2212 { 2213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2214 PetscErrorCode ierr; 2215 2216 PetscFunctionBegin; 2217 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2218 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 #undef __FUNCT__ 2223 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2224 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2225 { 2226 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2227 PetscErrorCode ierr; 2228 2229 PetscFunctionBegin; 2230 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2231 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #if defined(PETSC_HAVE_PBGL) 2236 2237 #include <boost/parallel/mpi/bsp_process_group.hpp> 2238 #include <boost/graph/distributed/ilu_default_graph.hpp> 2239 #include <boost/graph/distributed/ilu_0_block.hpp> 2240 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2241 #include <boost/graph/distributed/petsc/interface.hpp> 2242 #include <boost/multi_array.hpp> 2243 #include <boost/parallel/distributed_property_map->hpp> 2244 2245 #undef __FUNCT__ 2246 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2247 /* 2248 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2249 */ 2250 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2251 { 2252 namespace petsc = boost::distributed::petsc; 2253 2254 namespace graph_dist = boost::graph::distributed; 2255 using boost::graph::distributed::ilu_default::process_group_type; 2256 using boost::graph::ilu_permuted; 2257 2258 PetscBool row_identity, col_identity; 2259 PetscContainer c; 2260 PetscInt m, n, M, N; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2265 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2266 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2267 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2268 2269 process_group_type pg; 2270 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2271 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2272 lgraph_type& level_graph = *lgraph_p; 2273 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2274 2275 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2276 ilu_permuted(level_graph); 2277 2278 /* put together the new matrix */ 2279 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2280 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2281 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2282 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2283 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2284 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2285 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2286 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2287 2288 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2289 ierr = PetscContainerSetPointer(c, lgraph_p); 2290 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2291 ierr = PetscContainerDestroy(&c); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2297 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2298 { 2299 PetscFunctionBegin; 2300 PetscFunctionReturn(0); 2301 } 2302 2303 #undef __FUNCT__ 2304 #define __FUNCT__ "MatSolve_MPIAIJ" 2305 /* 2306 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2307 */ 2308 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2309 { 2310 namespace graph_dist = boost::graph::distributed; 2311 2312 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2313 lgraph_type *lgraph_p; 2314 PetscContainer c; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2319 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2320 ierr = VecCopy(b, x);CHKERRQ(ierr); 2321 2322 PetscScalar *array_x; 2323 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2324 PetscInt sx; 2325 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2326 2327 PetscScalar *array_b; 2328 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2329 PetscInt sb; 2330 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2331 2332 lgraph_type& level_graph = *lgraph_p; 2333 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2334 2335 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2336 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2337 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2338 2339 typedef boost::iterator_property_map<array_ref_type::iterator, 2340 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2341 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2342 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2343 2344 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2345 PetscFunctionReturn(0); 2346 } 2347 #endif 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatDestroy_MatRedundant" 2351 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2352 { 2353 PetscErrorCode ierr; 2354 Mat_Redundant *redund; 2355 PetscInt i; 2356 PetscMPIInt size; 2357 2358 PetscFunctionBegin; 2359 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2360 if (size == 1) { 2361 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2362 redund = a->redundant; 2363 } else { 2364 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2365 redund = a->redundant; 2366 } 2367 if (redund){ 2368 if (redund->matseq) { /* via MatGetSubMatrices() */ 2369 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2370 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2371 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2372 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2373 } else { 2374 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2375 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2376 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2377 for (i=0; i<redund->nrecvs; i++) { 2378 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2379 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2380 } 2381 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2382 } 2383 2384 if (redund->psubcomm) { 2385 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2386 } 2387 ierr = redund->Destroy(A);CHKERRQ(ierr); 2388 ierr = PetscFree(redund);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #undef __FUNCT__ 2394 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2395 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2396 { 2397 PetscMPIInt rank,size; 2398 MPI_Comm comm; 2399 PetscErrorCode ierr; 2400 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2401 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2402 PetscInt *rowrange = mat->rmap->range; 2403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2404 Mat A = aij->A,B=aij->B,C=*matredundant; 2405 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2406 PetscScalar *sbuf_a; 2407 PetscInt nzlocal=a->nz+b->nz; 2408 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2409 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2410 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2411 MatScalar *aworkA,*aworkB; 2412 PetscScalar *vals; 2413 PetscMPIInt tag1,tag2,tag3,imdex; 2414 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2415 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2416 MPI_Status recv_status,*send_status; 2417 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2418 PetscInt **rbuf_j=NULL; 2419 PetscScalar **rbuf_a=NULL; 2420 Mat_Redundant *redund =NULL; 2421 2422 PetscFunctionBegin; 2423 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2425 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2426 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2427 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2428 2429 if (reuse == MAT_REUSE_MATRIX) { 2430 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2431 if (subsize == 1) { 2432 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2433 redund = c->redundant; 2434 } else { 2435 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2436 redund = c->redundant; 2437 } 2438 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2439 2440 nsends = redund->nsends; 2441 nrecvs = redund->nrecvs; 2442 send_rank = redund->send_rank; 2443 recv_rank = redund->recv_rank; 2444 sbuf_nz = redund->sbuf_nz; 2445 rbuf_nz = redund->rbuf_nz; 2446 sbuf_j = redund->sbuf_j; 2447 sbuf_a = redund->sbuf_a; 2448 rbuf_j = redund->rbuf_j; 2449 rbuf_a = redund->rbuf_a; 2450 } 2451 2452 if (reuse == MAT_INITIAL_MATRIX) { 2453 PetscInt nleftover,np_subcomm; 2454 2455 /* get the destination processors' id send_rank, nsends and nrecvs */ 2456 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2457 2458 np_subcomm = size/nsubcomm; 2459 nleftover = size - nsubcomm*np_subcomm; 2460 2461 /* block of codes below is specific for INTERLACED */ 2462 /* ------------------------------------------------*/ 2463 nsends = 0; nrecvs = 0; 2464 for (i=0; i<size; i++) { 2465 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2466 send_rank[nsends++] = i; 2467 recv_rank[nrecvs++] = i; 2468 } 2469 } 2470 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2471 i = size-nleftover-1; 2472 j = 0; 2473 while (j < nsubcomm - nleftover) { 2474 send_rank[nsends++] = i; 2475 i--; j++; 2476 } 2477 } 2478 2479 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2480 for (i=0; i<nleftover; i++) { 2481 recv_rank[nrecvs++] = size-nleftover+i; 2482 } 2483 } 2484 /*----------------------------------------------*/ 2485 2486 /* allocate sbuf_j, sbuf_a */ 2487 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2488 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2489 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2490 /* 2491 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2492 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2493 */ 2494 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2495 2496 /* copy mat's local entries into the buffers */ 2497 if (reuse == MAT_INITIAL_MATRIX) { 2498 rownz_max = 0; 2499 rptr = sbuf_j; 2500 cols = sbuf_j + rend-rstart + 1; 2501 vals = sbuf_a; 2502 rptr[0] = 0; 2503 for (i=0; i<rend-rstart; i++) { 2504 row = i + rstart; 2505 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2506 ncols = nzA + nzB; 2507 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2508 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2509 /* load the column indices for this row into cols */ 2510 lwrite = 0; 2511 for (l=0; l<nzB; l++) { 2512 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2513 vals[lwrite] = aworkB[l]; 2514 cols[lwrite++] = ctmp; 2515 } 2516 } 2517 for (l=0; l<nzA; l++) { 2518 vals[lwrite] = aworkA[l]; 2519 cols[lwrite++] = cstart + cworkA[l]; 2520 } 2521 for (l=0; l<nzB; l++) { 2522 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2523 vals[lwrite] = aworkB[l]; 2524 cols[lwrite++] = ctmp; 2525 } 2526 } 2527 vals += ncols; 2528 cols += ncols; 2529 rptr[i+1] = rptr[i] + ncols; 2530 if (rownz_max < ncols) rownz_max = ncols; 2531 } 2532 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2533 } else { /* only copy matrix values into sbuf_a */ 2534 rptr = sbuf_j; 2535 vals = sbuf_a; 2536 rptr[0] = 0; 2537 for (i=0; i<rend-rstart; i++) { 2538 row = i + rstart; 2539 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2540 ncols = nzA + nzB; 2541 cworkB = b->j + b->i[i]; 2542 aworkA = a->a + a->i[i]; 2543 aworkB = b->a + b->i[i]; 2544 lwrite = 0; 2545 for (l=0; l<nzB; l++) { 2546 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2547 } 2548 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2549 for (l=0; l<nzB; l++) { 2550 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2551 } 2552 vals += ncols; 2553 rptr[i+1] = rptr[i] + ncols; 2554 } 2555 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2556 2557 /* send nzlocal to others, and recv other's nzlocal */ 2558 /*--------------------------------------------------*/ 2559 if (reuse == MAT_INITIAL_MATRIX) { 2560 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2561 2562 s_waits2 = s_waits3 + nsends; 2563 s_waits1 = s_waits2 + nsends; 2564 r_waits1 = s_waits1 + nsends; 2565 r_waits2 = r_waits1 + nrecvs; 2566 r_waits3 = r_waits2 + nrecvs; 2567 } else { 2568 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2569 2570 r_waits3 = s_waits3 + nsends; 2571 } 2572 2573 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2574 if (reuse == MAT_INITIAL_MATRIX) { 2575 /* get new tags to keep the communication clean */ 2576 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2577 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2578 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2579 2580 /* post receives of other's nzlocal */ 2581 for (i=0; i<nrecvs; i++) { 2582 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2583 } 2584 /* send nzlocal to others */ 2585 for (i=0; i<nsends; i++) { 2586 sbuf_nz[i] = nzlocal; 2587 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2588 } 2589 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2590 count = nrecvs; 2591 while (count) { 2592 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2593 2594 recv_rank[imdex] = recv_status.MPI_SOURCE; 2595 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2596 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2597 2598 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2599 2600 rbuf_nz[imdex] += i + 2; 2601 2602 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2603 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2604 count--; 2605 } 2606 /* wait on sends of nzlocal */ 2607 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2608 /* send mat->i,j to others, and recv from other's */ 2609 /*------------------------------------------------*/ 2610 for (i=0; i<nsends; i++) { 2611 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2612 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2613 } 2614 /* wait on receives of mat->i,j */ 2615 /*------------------------------*/ 2616 count = nrecvs; 2617 while (count) { 2618 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2619 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2620 count--; 2621 } 2622 /* wait on sends of mat->i,j */ 2623 /*---------------------------*/ 2624 if (nsends) { 2625 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2626 } 2627 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2628 2629 /* post receives, send and receive mat->a */ 2630 /*----------------------------------------*/ 2631 for (imdex=0; imdex<nrecvs; imdex++) { 2632 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2633 } 2634 for (i=0; i<nsends; i++) { 2635 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2636 } 2637 count = nrecvs; 2638 while (count) { 2639 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2640 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2641 count--; 2642 } 2643 if (nsends) { 2644 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2645 } 2646 2647 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2648 2649 /* create redundant matrix */ 2650 /*-------------------------*/ 2651 if (reuse == MAT_INITIAL_MATRIX) { 2652 const PetscInt *range; 2653 PetscInt rstart_sub,rend_sub,mloc_sub; 2654 2655 /* compute rownz_max for preallocation */ 2656 for (imdex=0; imdex<nrecvs; imdex++) { 2657 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2658 rptr = rbuf_j[imdex]; 2659 for (i=0; i<j; i++) { 2660 ncols = rptr[i+1] - rptr[i]; 2661 if (rownz_max < ncols) rownz_max = ncols; 2662 } 2663 } 2664 2665 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2666 2667 /* get local size of redundant matrix 2668 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2669 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2670 rstart_sub = range[nsubcomm*subrank]; 2671 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2672 rend_sub = range[nsubcomm*(subrank+1)]; 2673 } else { 2674 rend_sub = mat->rmap->N; 2675 } 2676 mloc_sub = rend_sub - rstart_sub; 2677 2678 if (M == N) { 2679 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2680 } else { /* non-square matrix */ 2681 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2682 } 2683 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2684 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2685 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2686 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2687 } else { 2688 C = *matredundant; 2689 } 2690 2691 /* insert local matrix entries */ 2692 rptr = sbuf_j; 2693 cols = sbuf_j + rend-rstart + 1; 2694 vals = sbuf_a; 2695 for (i=0; i<rend-rstart; i++) { 2696 row = i + rstart; 2697 ncols = rptr[i+1] - rptr[i]; 2698 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2699 vals += ncols; 2700 cols += ncols; 2701 } 2702 /* insert received matrix entries */ 2703 for (imdex=0; imdex<nrecvs; imdex++) { 2704 rstart = rowrange[recv_rank[imdex]]; 2705 rend = rowrange[recv_rank[imdex]+1]; 2706 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2707 rptr = rbuf_j[imdex]; 2708 cols = rbuf_j[imdex] + rend-rstart + 1; 2709 vals = rbuf_a[imdex]; 2710 for (i=0; i<rend-rstart; i++) { 2711 row = i + rstart; 2712 ncols = rptr[i+1] - rptr[i]; 2713 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2714 vals += ncols; 2715 cols += ncols; 2716 } 2717 } 2718 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2719 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 2721 if (reuse == MAT_INITIAL_MATRIX) { 2722 *matredundant = C; 2723 2724 /* create a supporting struct and attach it to C for reuse */ 2725 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2726 if (subsize == 1) { 2727 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2728 c->redundant = redund; 2729 } else { 2730 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2731 c->redundant = redund; 2732 } 2733 2734 redund->nzlocal = nzlocal; 2735 redund->nsends = nsends; 2736 redund->nrecvs = nrecvs; 2737 redund->send_rank = send_rank; 2738 redund->recv_rank = recv_rank; 2739 redund->sbuf_nz = sbuf_nz; 2740 redund->rbuf_nz = rbuf_nz; 2741 redund->sbuf_j = sbuf_j; 2742 redund->sbuf_a = sbuf_a; 2743 redund->rbuf_j = rbuf_j; 2744 redund->rbuf_a = rbuf_a; 2745 redund->psubcomm = NULL; 2746 2747 redund->Destroy = C->ops->destroy; 2748 C->ops->destroy = MatDestroy_MatRedundant; 2749 } 2750 PetscFunctionReturn(0); 2751 } 2752 2753 #undef __FUNCT__ 2754 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2755 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2756 { 2757 PetscErrorCode ierr; 2758 MPI_Comm comm; 2759 PetscMPIInt size,subsize; 2760 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2761 Mat_Redundant *redund=NULL; 2762 PetscSubcomm psubcomm=NULL; 2763 MPI_Comm subcomm_in=subcomm; 2764 Mat *matseq; 2765 IS isrow,iscol; 2766 2767 PetscFunctionBegin; 2768 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2769 if (reuse == MAT_INITIAL_MATRIX) { 2770 /* create psubcomm, then get subcomm */ 2771 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2772 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2773 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2774 2775 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2776 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2777 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2778 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2779 subcomm = psubcomm->comm; 2780 } else { /* retrieve psubcomm and subcomm */ 2781 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2782 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2783 if (subsize == 1) { 2784 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2785 redund = c->redundant; 2786 } else { 2787 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2788 redund = c->redundant; 2789 } 2790 psubcomm = redund->psubcomm; 2791 } 2792 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2793 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2794 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 2795 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2796 if (subsize == 1) { 2797 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2798 c->redundant->psubcomm = psubcomm; 2799 } else { 2800 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2801 c->redundant->psubcomm = psubcomm ; 2802 } 2803 } 2804 PetscFunctionReturn(0); 2805 } 2806 } 2807 2808 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2809 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2810 if (reuse == MAT_INITIAL_MATRIX) { 2811 /* create a local sequential matrix matseq[0] */ 2812 mloc_sub = PETSC_DECIDE; 2813 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2814 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2815 rstart = rend - mloc_sub; 2816 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2817 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2818 } else { /* reuse == MAT_REUSE_MATRIX */ 2819 if (subsize == 1) { 2820 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2821 redund = c->redundant; 2822 } else { 2823 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2824 redund = c->redundant; 2825 } 2826 2827 isrow = redund->isrow; 2828 iscol = redund->iscol; 2829 matseq = redund->matseq; 2830 } 2831 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2832 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2833 2834 if (reuse == MAT_INITIAL_MATRIX) { 2835 /* create a supporting struct and attach it to C for reuse */ 2836 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2837 if (subsize == 1) { 2838 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2839 c->redundant = redund; 2840 } else { 2841 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2842 c->redundant = redund; 2843 } 2844 redund->isrow = isrow; 2845 redund->iscol = iscol; 2846 redund->matseq = matseq; 2847 redund->psubcomm = psubcomm; 2848 redund->Destroy = (*matredundant)->ops->destroy; 2849 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 #undef __FUNCT__ 2855 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2856 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2857 { 2858 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2859 PetscErrorCode ierr; 2860 PetscInt i,*idxb = 0; 2861 PetscScalar *va,*vb; 2862 Vec vtmp; 2863 2864 PetscFunctionBegin; 2865 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2866 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2867 if (idx) { 2868 for (i=0; i<A->rmap->n; i++) { 2869 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2870 } 2871 } 2872 2873 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2874 if (idx) { 2875 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2876 } 2877 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2878 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2879 2880 for (i=0; i<A->rmap->n; i++) { 2881 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2882 va[i] = vb[i]; 2883 if (idx) idx[i] = a->garray[idxb[i]]; 2884 } 2885 } 2886 2887 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2889 ierr = PetscFree(idxb);CHKERRQ(ierr); 2890 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2891 PetscFunctionReturn(0); 2892 } 2893 2894 #undef __FUNCT__ 2895 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2896 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2897 { 2898 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2899 PetscErrorCode ierr; 2900 PetscInt i,*idxb = 0; 2901 PetscScalar *va,*vb; 2902 Vec vtmp; 2903 2904 PetscFunctionBegin; 2905 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2906 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2907 if (idx) { 2908 for (i=0; i<A->cmap->n; i++) { 2909 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2910 } 2911 } 2912 2913 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2914 if (idx) { 2915 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2916 } 2917 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2918 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2919 2920 for (i=0; i<A->rmap->n; i++) { 2921 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2922 va[i] = vb[i]; 2923 if (idx) idx[i] = a->garray[idxb[i]]; 2924 } 2925 } 2926 2927 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2928 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2929 ierr = PetscFree(idxb);CHKERRQ(ierr); 2930 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2931 PetscFunctionReturn(0); 2932 } 2933 2934 #undef __FUNCT__ 2935 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2936 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2937 { 2938 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2939 PetscInt n = A->rmap->n; 2940 PetscInt cstart = A->cmap->rstart; 2941 PetscInt *cmap = mat->garray; 2942 PetscInt *diagIdx, *offdiagIdx; 2943 Vec diagV, offdiagV; 2944 PetscScalar *a, *diagA, *offdiagA; 2945 PetscInt r; 2946 PetscErrorCode ierr; 2947 2948 PetscFunctionBegin; 2949 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2950 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2951 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2952 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2953 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2954 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2955 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2956 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2957 for (r = 0; r < n; ++r) { 2958 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2959 a[r] = diagA[r]; 2960 idx[r] = cstart + diagIdx[r]; 2961 } else { 2962 a[r] = offdiagA[r]; 2963 idx[r] = cmap[offdiagIdx[r]]; 2964 } 2965 } 2966 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2967 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2968 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2969 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2970 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2971 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2972 PetscFunctionReturn(0); 2973 } 2974 2975 #undef __FUNCT__ 2976 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2977 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2978 { 2979 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2980 PetscInt n = A->rmap->n; 2981 PetscInt cstart = A->cmap->rstart; 2982 PetscInt *cmap = mat->garray; 2983 PetscInt *diagIdx, *offdiagIdx; 2984 Vec diagV, offdiagV; 2985 PetscScalar *a, *diagA, *offdiagA; 2986 PetscInt r; 2987 PetscErrorCode ierr; 2988 2989 PetscFunctionBegin; 2990 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2991 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2992 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2993 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2994 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2995 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2996 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2997 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2998 for (r = 0; r < n; ++r) { 2999 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3000 a[r] = diagA[r]; 3001 idx[r] = cstart + diagIdx[r]; 3002 } else { 3003 a[r] = offdiagA[r]; 3004 idx[r] = cmap[offdiagIdx[r]]; 3005 } 3006 } 3007 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3008 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3010 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3011 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3012 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3018 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3019 { 3020 PetscErrorCode ierr; 3021 Mat *dummy; 3022 3023 PetscFunctionBegin; 3024 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3025 *newmat = *dummy; 3026 ierr = PetscFree(dummy);CHKERRQ(ierr); 3027 PetscFunctionReturn(0); 3028 } 3029 3030 #undef __FUNCT__ 3031 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3032 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3033 { 3034 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3035 PetscErrorCode ierr; 3036 3037 PetscFunctionBegin; 3038 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3039 PetscFunctionReturn(0); 3040 } 3041 3042 #undef __FUNCT__ 3043 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3044 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3045 { 3046 PetscErrorCode ierr; 3047 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3048 3049 PetscFunctionBegin; 3050 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3051 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3052 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3053 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3054 PetscFunctionReturn(0); 3055 } 3056 3057 /* -------------------------------------------------------------------*/ 3058 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3059 MatGetRow_MPIAIJ, 3060 MatRestoreRow_MPIAIJ, 3061 MatMult_MPIAIJ, 3062 /* 4*/ MatMultAdd_MPIAIJ, 3063 MatMultTranspose_MPIAIJ, 3064 MatMultTransposeAdd_MPIAIJ, 3065 #if defined(PETSC_HAVE_PBGL) 3066 MatSolve_MPIAIJ, 3067 #else 3068 0, 3069 #endif 3070 0, 3071 0, 3072 /*10*/ 0, 3073 0, 3074 0, 3075 MatSOR_MPIAIJ, 3076 MatTranspose_MPIAIJ, 3077 /*15*/ MatGetInfo_MPIAIJ, 3078 MatEqual_MPIAIJ, 3079 MatGetDiagonal_MPIAIJ, 3080 MatDiagonalScale_MPIAIJ, 3081 MatNorm_MPIAIJ, 3082 /*20*/ MatAssemblyBegin_MPIAIJ, 3083 MatAssemblyEnd_MPIAIJ, 3084 MatSetOption_MPIAIJ, 3085 MatZeroEntries_MPIAIJ, 3086 /*24*/ MatZeroRows_MPIAIJ, 3087 0, 3088 #if defined(PETSC_HAVE_PBGL) 3089 0, 3090 #else 3091 0, 3092 #endif 3093 0, 3094 0, 3095 /*29*/ MatSetUp_MPIAIJ, 3096 #if defined(PETSC_HAVE_PBGL) 3097 0, 3098 #else 3099 0, 3100 #endif 3101 0, 3102 0, 3103 0, 3104 /*34*/ MatDuplicate_MPIAIJ, 3105 0, 3106 0, 3107 0, 3108 0, 3109 /*39*/ MatAXPY_MPIAIJ, 3110 MatGetSubMatrices_MPIAIJ, 3111 MatIncreaseOverlap_MPIAIJ, 3112 MatGetValues_MPIAIJ, 3113 MatCopy_MPIAIJ, 3114 /*44*/ MatGetRowMax_MPIAIJ, 3115 MatScale_MPIAIJ, 3116 0, 3117 0, 3118 MatZeroRowsColumns_MPIAIJ, 3119 /*49*/ MatSetRandom_MPIAIJ, 3120 0, 3121 0, 3122 0, 3123 0, 3124 /*54*/ MatFDColoringCreate_MPIXAIJ, 3125 0, 3126 MatSetUnfactored_MPIAIJ, 3127 MatPermute_MPIAIJ, 3128 0, 3129 /*59*/ MatGetSubMatrix_MPIAIJ, 3130 MatDestroy_MPIAIJ, 3131 MatView_MPIAIJ, 3132 0, 3133 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3134 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3135 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3136 0, 3137 0, 3138 0, 3139 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3140 MatGetRowMinAbs_MPIAIJ, 3141 0, 3142 MatSetColoring_MPIAIJ, 3143 0, 3144 MatSetValuesAdifor_MPIAIJ, 3145 /*75*/ MatFDColoringApply_AIJ, 3146 0, 3147 0, 3148 0, 3149 MatFindZeroDiagonals_MPIAIJ, 3150 /*80*/ 0, 3151 0, 3152 0, 3153 /*83*/ MatLoad_MPIAIJ, 3154 0, 3155 0, 3156 0, 3157 0, 3158 0, 3159 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3160 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3161 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3162 MatPtAP_MPIAIJ_MPIAIJ, 3163 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3164 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3165 0, 3166 0, 3167 0, 3168 0, 3169 /*99*/ 0, 3170 0, 3171 0, 3172 MatConjugate_MPIAIJ, 3173 0, 3174 /*104*/MatSetValuesRow_MPIAIJ, 3175 MatRealPart_MPIAIJ, 3176 MatImaginaryPart_MPIAIJ, 3177 0, 3178 0, 3179 /*109*/0, 3180 MatGetRedundantMatrix_MPIAIJ, 3181 MatGetRowMin_MPIAIJ, 3182 0, 3183 0, 3184 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3185 0, 3186 0, 3187 0, 3188 0, 3189 /*119*/0, 3190 0, 3191 0, 3192 0, 3193 MatGetMultiProcBlock_MPIAIJ, 3194 /*124*/MatFindNonzeroRows_MPIAIJ, 3195 MatGetColumnNorms_MPIAIJ, 3196 MatInvertBlockDiagonal_MPIAIJ, 3197 0, 3198 MatGetSubMatricesParallel_MPIAIJ, 3199 /*129*/0, 3200 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3201 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3202 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3203 0, 3204 /*134*/0, 3205 0, 3206 0, 3207 0, 3208 0, 3209 /*139*/0, 3210 0, 3211 0, 3212 MatFDColoringSetUp_MPIXAIJ 3213 }; 3214 3215 /* ----------------------------------------------------------------------------------------*/ 3216 3217 #undef __FUNCT__ 3218 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3219 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3220 { 3221 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3226 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3227 PetscFunctionReturn(0); 3228 } 3229 3230 #undef __FUNCT__ 3231 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3232 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3233 { 3234 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3235 PetscErrorCode ierr; 3236 3237 PetscFunctionBegin; 3238 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3239 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 #undef __FUNCT__ 3244 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3245 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3246 { 3247 Mat_MPIAIJ *b; 3248 PetscErrorCode ierr; 3249 3250 PetscFunctionBegin; 3251 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3252 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3253 b = (Mat_MPIAIJ*)B->data; 3254 3255 if (!B->preallocated) { 3256 /* Explicitly create 2 MATSEQAIJ matrices. */ 3257 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3258 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3259 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3260 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3261 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3262 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3263 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3264 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3265 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3266 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3267 } 3268 3269 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3270 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3271 B->preallocated = PETSC_TRUE; 3272 PetscFunctionReturn(0); 3273 } 3274 3275 #undef __FUNCT__ 3276 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3277 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3278 { 3279 Mat mat; 3280 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3281 PetscErrorCode ierr; 3282 3283 PetscFunctionBegin; 3284 *newmat = 0; 3285 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3286 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3287 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3288 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3289 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3290 a = (Mat_MPIAIJ*)mat->data; 3291 3292 mat->factortype = matin->factortype; 3293 mat->assembled = PETSC_TRUE; 3294 mat->insertmode = NOT_SET_VALUES; 3295 mat->preallocated = PETSC_TRUE; 3296 3297 a->size = oldmat->size; 3298 a->rank = oldmat->rank; 3299 a->donotstash = oldmat->donotstash; 3300 a->roworiented = oldmat->roworiented; 3301 a->rowindices = 0; 3302 a->rowvalues = 0; 3303 a->getrowactive = PETSC_FALSE; 3304 3305 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3306 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3307 3308 if (oldmat->colmap) { 3309 #if defined(PETSC_USE_CTABLE) 3310 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3311 #else 3312 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3313 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3314 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3315 #endif 3316 } else a->colmap = 0; 3317 if (oldmat->garray) { 3318 PetscInt len; 3319 len = oldmat->B->cmap->n; 3320 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3321 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3322 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3323 } else a->garray = 0; 3324 3325 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3327 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3329 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3330 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3331 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3332 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3333 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3334 *newmat = mat; 3335 PetscFunctionReturn(0); 3336 } 3337 3338 3339 3340 #undef __FUNCT__ 3341 #define __FUNCT__ "MatLoad_MPIAIJ" 3342 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3343 { 3344 PetscScalar *vals,*svals; 3345 MPI_Comm comm; 3346 PetscErrorCode ierr; 3347 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3348 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3349 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3350 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3351 PetscInt cend,cstart,n,*rowners,sizesset=1; 3352 int fd; 3353 PetscInt bs = 1; 3354 3355 PetscFunctionBegin; 3356 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3357 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3358 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3359 if (!rank) { 3360 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3361 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3362 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3363 } 3364 3365 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3366 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3367 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3368 3369 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3370 3371 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3372 M = header[1]; N = header[2]; 3373 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3374 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3375 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3376 3377 /* If global sizes are set, check if they are consistent with that given in the file */ 3378 if (sizesset) { 3379 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3380 } 3381 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3382 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3383 3384 /* determine ownership of all (block) rows */ 3385 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3386 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3387 else m = newMat->rmap->n; /* Set by user */ 3388 3389 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3390 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3391 3392 /* First process needs enough room for process with most rows */ 3393 if (!rank) { 3394 mmax = rowners[1]; 3395 for (i=2; i<=size; i++) { 3396 mmax = PetscMax(mmax, rowners[i]); 3397 } 3398 } else mmax = -1; /* unused, but compilers complain */ 3399 3400 rowners[0] = 0; 3401 for (i=2; i<=size; i++) { 3402 rowners[i] += rowners[i-1]; 3403 } 3404 rstart = rowners[rank]; 3405 rend = rowners[rank+1]; 3406 3407 /* distribute row lengths to all processors */ 3408 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3409 if (!rank) { 3410 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3411 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3412 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3413 for (j=0; j<m; j++) { 3414 procsnz[0] += ourlens[j]; 3415 } 3416 for (i=1; i<size; i++) { 3417 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3418 /* calculate the number of nonzeros on each processor */ 3419 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3420 procsnz[i] += rowlengths[j]; 3421 } 3422 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3423 } 3424 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3425 } else { 3426 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3427 } 3428 3429 if (!rank) { 3430 /* determine max buffer needed and allocate it */ 3431 maxnz = 0; 3432 for (i=0; i<size; i++) { 3433 maxnz = PetscMax(maxnz,procsnz[i]); 3434 } 3435 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3436 3437 /* read in my part of the matrix column indices */ 3438 nz = procsnz[0]; 3439 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3440 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3441 3442 /* read in every one elses and ship off */ 3443 for (i=1; i<size; i++) { 3444 nz = procsnz[i]; 3445 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3446 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3447 } 3448 ierr = PetscFree(cols);CHKERRQ(ierr); 3449 } else { 3450 /* determine buffer space needed for message */ 3451 nz = 0; 3452 for (i=0; i<m; i++) { 3453 nz += ourlens[i]; 3454 } 3455 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3456 3457 /* receive message of column indices*/ 3458 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3459 } 3460 3461 /* determine column ownership if matrix is not square */ 3462 if (N != M) { 3463 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3464 else n = newMat->cmap->n; 3465 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3466 cstart = cend - n; 3467 } else { 3468 cstart = rstart; 3469 cend = rend; 3470 n = cend - cstart; 3471 } 3472 3473 /* loop over local rows, determining number of off diagonal entries */ 3474 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3475 jj = 0; 3476 for (i=0; i<m; i++) { 3477 for (j=0; j<ourlens[i]; j++) { 3478 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3479 jj++; 3480 } 3481 } 3482 3483 for (i=0; i<m; i++) { 3484 ourlens[i] -= offlens[i]; 3485 } 3486 if (!sizesset) { 3487 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3488 } 3489 3490 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3491 3492 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3493 3494 for (i=0; i<m; i++) { 3495 ourlens[i] += offlens[i]; 3496 } 3497 3498 if (!rank) { 3499 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3500 3501 /* read in my part of the matrix numerical values */ 3502 nz = procsnz[0]; 3503 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3504 3505 /* insert into matrix */ 3506 jj = rstart; 3507 smycols = mycols; 3508 svals = vals; 3509 for (i=0; i<m; i++) { 3510 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3511 smycols += ourlens[i]; 3512 svals += ourlens[i]; 3513 jj++; 3514 } 3515 3516 /* read in other processors and ship out */ 3517 for (i=1; i<size; i++) { 3518 nz = procsnz[i]; 3519 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3520 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3521 } 3522 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3523 } else { 3524 /* receive numeric values */ 3525 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3526 3527 /* receive message of values*/ 3528 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3529 3530 /* insert into matrix */ 3531 jj = rstart; 3532 smycols = mycols; 3533 svals = vals; 3534 for (i=0; i<m; i++) { 3535 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3536 smycols += ourlens[i]; 3537 svals += ourlens[i]; 3538 jj++; 3539 } 3540 } 3541 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3542 ierr = PetscFree(vals);CHKERRQ(ierr); 3543 ierr = PetscFree(mycols);CHKERRQ(ierr); 3544 ierr = PetscFree(rowners);CHKERRQ(ierr); 3545 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3546 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3547 PetscFunctionReturn(0); 3548 } 3549 3550 #undef __FUNCT__ 3551 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3552 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3553 { 3554 PetscErrorCode ierr; 3555 IS iscol_local; 3556 PetscInt csize; 3557 3558 PetscFunctionBegin; 3559 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3560 if (call == MAT_REUSE_MATRIX) { 3561 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3562 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3563 } else { 3564 PetscInt cbs; 3565 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3566 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3567 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3568 } 3569 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3570 if (call == MAT_INITIAL_MATRIX) { 3571 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3572 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3573 } 3574 PetscFunctionReturn(0); 3575 } 3576 3577 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3578 #undef __FUNCT__ 3579 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3580 /* 3581 Not great since it makes two copies of the submatrix, first an SeqAIJ 3582 in local and then by concatenating the local matrices the end result. 3583 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3584 3585 Note: This requires a sequential iscol with all indices. 3586 */ 3587 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3588 { 3589 PetscErrorCode ierr; 3590 PetscMPIInt rank,size; 3591 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3592 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3593 PetscBool allcolumns, colflag; 3594 Mat M,Mreuse; 3595 MatScalar *vwork,*aa; 3596 MPI_Comm comm; 3597 Mat_SeqAIJ *aij; 3598 3599 PetscFunctionBegin; 3600 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3601 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3602 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3603 3604 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3605 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3606 if (colflag && ncol == mat->cmap->N) { 3607 allcolumns = PETSC_TRUE; 3608 } else { 3609 allcolumns = PETSC_FALSE; 3610 } 3611 if (call == MAT_REUSE_MATRIX) { 3612 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3613 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3614 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3615 } else { 3616 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3617 } 3618 3619 /* 3620 m - number of local rows 3621 n - number of columns (same on all processors) 3622 rstart - first row in new global matrix generated 3623 */ 3624 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3625 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3626 if (call == MAT_INITIAL_MATRIX) { 3627 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3628 ii = aij->i; 3629 jj = aij->j; 3630 3631 /* 3632 Determine the number of non-zeros in the diagonal and off-diagonal 3633 portions of the matrix in order to do correct preallocation 3634 */ 3635 3636 /* first get start and end of "diagonal" columns */ 3637 if (csize == PETSC_DECIDE) { 3638 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3639 if (mglobal == n) { /* square matrix */ 3640 nlocal = m; 3641 } else { 3642 nlocal = n/size + ((n % size) > rank); 3643 } 3644 } else { 3645 nlocal = csize; 3646 } 3647 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3648 rstart = rend - nlocal; 3649 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3650 3651 /* next, compute all the lengths */ 3652 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3653 olens = dlens + m; 3654 for (i=0; i<m; i++) { 3655 jend = ii[i+1] - ii[i]; 3656 olen = 0; 3657 dlen = 0; 3658 for (j=0; j<jend; j++) { 3659 if (*jj < rstart || *jj >= rend) olen++; 3660 else dlen++; 3661 jj++; 3662 } 3663 olens[i] = olen; 3664 dlens[i] = dlen; 3665 } 3666 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3667 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3668 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3669 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3670 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3671 ierr = PetscFree(dlens);CHKERRQ(ierr); 3672 } else { 3673 PetscInt ml,nl; 3674 3675 M = *newmat; 3676 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3677 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3678 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3679 /* 3680 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3681 rather than the slower MatSetValues(). 3682 */ 3683 M->was_assembled = PETSC_TRUE; 3684 M->assembled = PETSC_FALSE; 3685 } 3686 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3687 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3688 ii = aij->i; 3689 jj = aij->j; 3690 aa = aij->a; 3691 for (i=0; i<m; i++) { 3692 row = rstart + i; 3693 nz = ii[i+1] - ii[i]; 3694 cwork = jj; jj += nz; 3695 vwork = aa; aa += nz; 3696 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3697 } 3698 3699 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3700 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3701 *newmat = M; 3702 3703 /* save submatrix used in processor for next request */ 3704 if (call == MAT_INITIAL_MATRIX) { 3705 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3706 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3707 } 3708 PetscFunctionReturn(0); 3709 } 3710 3711 #undef __FUNCT__ 3712 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3713 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3714 { 3715 PetscInt m,cstart, cend,j,nnz,i,d; 3716 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3717 const PetscInt *JJ; 3718 PetscScalar *values; 3719 PetscErrorCode ierr; 3720 3721 PetscFunctionBegin; 3722 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3723 3724 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3725 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3726 m = B->rmap->n; 3727 cstart = B->cmap->rstart; 3728 cend = B->cmap->rend; 3729 rstart = B->rmap->rstart; 3730 3731 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3732 3733 #if defined(PETSC_USE_DEBUGGING) 3734 for (i=0; i<m; i++) { 3735 nnz = Ii[i+1]- Ii[i]; 3736 JJ = J + Ii[i]; 3737 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3738 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3739 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3740 } 3741 #endif 3742 3743 for (i=0; i<m; i++) { 3744 nnz = Ii[i+1]- Ii[i]; 3745 JJ = J + Ii[i]; 3746 nnz_max = PetscMax(nnz_max,nnz); 3747 d = 0; 3748 for (j=0; j<nnz; j++) { 3749 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3750 } 3751 d_nnz[i] = d; 3752 o_nnz[i] = nnz - d; 3753 } 3754 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3755 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3756 3757 if (v) values = (PetscScalar*)v; 3758 else { 3759 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3760 } 3761 3762 for (i=0; i<m; i++) { 3763 ii = i + rstart; 3764 nnz = Ii[i+1]- Ii[i]; 3765 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3766 } 3767 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3768 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3769 3770 if (!v) { 3771 ierr = PetscFree(values);CHKERRQ(ierr); 3772 } 3773 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3774 PetscFunctionReturn(0); 3775 } 3776 3777 #undef __FUNCT__ 3778 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3779 /*@ 3780 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3781 (the default parallel PETSc format). 3782 3783 Collective on MPI_Comm 3784 3785 Input Parameters: 3786 + B - the matrix 3787 . i - the indices into j for the start of each local row (starts with zero) 3788 . j - the column indices for each local row (starts with zero) 3789 - v - optional values in the matrix 3790 3791 Level: developer 3792 3793 Notes: 3794 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3795 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3796 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3797 3798 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3799 3800 The format which is used for the sparse matrix input, is equivalent to a 3801 row-major ordering.. i.e for the following matrix, the input data expected is 3802 as shown: 3803 3804 1 0 0 3805 2 0 3 P0 3806 ------- 3807 4 5 6 P1 3808 3809 Process0 [P0]: rows_owned=[0,1] 3810 i = {0,1,3} [size = nrow+1 = 2+1] 3811 j = {0,0,2} [size = nz = 6] 3812 v = {1,2,3} [size = nz = 6] 3813 3814 Process1 [P1]: rows_owned=[2] 3815 i = {0,3} [size = nrow+1 = 1+1] 3816 j = {0,1,2} [size = nz = 6] 3817 v = {4,5,6} [size = nz = 6] 3818 3819 .keywords: matrix, aij, compressed row, sparse, parallel 3820 3821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3822 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3823 @*/ 3824 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3825 { 3826 PetscErrorCode ierr; 3827 3828 PetscFunctionBegin; 3829 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3830 PetscFunctionReturn(0); 3831 } 3832 3833 #undef __FUNCT__ 3834 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3835 /*@C 3836 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3837 (the default parallel PETSc format). For good matrix assembly performance 3838 the user should preallocate the matrix storage by setting the parameters 3839 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3840 performance can be increased by more than a factor of 50. 3841 3842 Collective on MPI_Comm 3843 3844 Input Parameters: 3845 + A - the matrix 3846 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3847 (same value is used for all local rows) 3848 . d_nnz - array containing the number of nonzeros in the various rows of the 3849 DIAGONAL portion of the local submatrix (possibly different for each row) 3850 or NULL, if d_nz is used to specify the nonzero structure. 3851 The size of this array is equal to the number of local rows, i.e 'm'. 3852 For matrices that will be factored, you must leave room for (and set) 3853 the diagonal entry even if it is zero. 3854 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3855 submatrix (same value is used for all local rows). 3856 - o_nnz - array containing the number of nonzeros in the various rows of the 3857 OFF-DIAGONAL portion of the local submatrix (possibly different for 3858 each row) or NULL, if o_nz is used to specify the nonzero 3859 structure. The size of this array is equal to the number 3860 of local rows, i.e 'm'. 3861 3862 If the *_nnz parameter is given then the *_nz parameter is ignored 3863 3864 The AIJ format (also called the Yale sparse matrix format or 3865 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3866 storage. The stored row and column indices begin with zero. 3867 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3868 3869 The parallel matrix is partitioned such that the first m0 rows belong to 3870 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3871 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3872 3873 The DIAGONAL portion of the local submatrix of a processor can be defined 3874 as the submatrix which is obtained by extraction the part corresponding to 3875 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3876 first row that belongs to the processor, r2 is the last row belonging to 3877 the this processor, and c1-c2 is range of indices of the local part of a 3878 vector suitable for applying the matrix to. This is an mxn matrix. In the 3879 common case of a square matrix, the row and column ranges are the same and 3880 the DIAGONAL part is also square. The remaining portion of the local 3881 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3882 3883 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3884 3885 You can call MatGetInfo() to get information on how effective the preallocation was; 3886 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3887 You can also run with the option -info and look for messages with the string 3888 malloc in them to see if additional memory allocation was needed. 3889 3890 Example usage: 3891 3892 Consider the following 8x8 matrix with 34 non-zero values, that is 3893 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3894 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3895 as follows: 3896 3897 .vb 3898 1 2 0 | 0 3 0 | 0 4 3899 Proc0 0 5 6 | 7 0 0 | 8 0 3900 9 0 10 | 11 0 0 | 12 0 3901 ------------------------------------- 3902 13 0 14 | 15 16 17 | 0 0 3903 Proc1 0 18 0 | 19 20 21 | 0 0 3904 0 0 0 | 22 23 0 | 24 0 3905 ------------------------------------- 3906 Proc2 25 26 27 | 0 0 28 | 29 0 3907 30 0 0 | 31 32 33 | 0 34 3908 .ve 3909 3910 This can be represented as a collection of submatrices as: 3911 3912 .vb 3913 A B C 3914 D E F 3915 G H I 3916 .ve 3917 3918 Where the submatrices A,B,C are owned by proc0, D,E,F are 3919 owned by proc1, G,H,I are owned by proc2. 3920 3921 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3922 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3923 The 'M','N' parameters are 8,8, and have the same values on all procs. 3924 3925 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3926 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3927 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3928 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3929 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3930 matrix, ans [DF] as another SeqAIJ matrix. 3931 3932 When d_nz, o_nz parameters are specified, d_nz storage elements are 3933 allocated for every row of the local diagonal submatrix, and o_nz 3934 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3935 One way to choose d_nz and o_nz is to use the max nonzerors per local 3936 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3937 In this case, the values of d_nz,o_nz are: 3938 .vb 3939 proc0 : dnz = 2, o_nz = 2 3940 proc1 : dnz = 3, o_nz = 2 3941 proc2 : dnz = 1, o_nz = 4 3942 .ve 3943 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3944 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3945 for proc3. i.e we are using 12+15+10=37 storage locations to store 3946 34 values. 3947 3948 When d_nnz, o_nnz parameters are specified, the storage is specified 3949 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3950 In the above case the values for d_nnz,o_nnz are: 3951 .vb 3952 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3953 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3954 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3955 .ve 3956 Here the space allocated is sum of all the above values i.e 34, and 3957 hence pre-allocation is perfect. 3958 3959 Level: intermediate 3960 3961 .keywords: matrix, aij, compressed row, sparse, parallel 3962 3963 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3964 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3965 @*/ 3966 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3967 { 3968 PetscErrorCode ierr; 3969 3970 PetscFunctionBegin; 3971 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3972 PetscValidType(B,1); 3973 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3974 PetscFunctionReturn(0); 3975 } 3976 3977 #undef __FUNCT__ 3978 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3979 /*@ 3980 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3981 CSR format the local rows. 3982 3983 Collective on MPI_Comm 3984 3985 Input Parameters: 3986 + comm - MPI communicator 3987 . m - number of local rows (Cannot be PETSC_DECIDE) 3988 . n - This value should be the same as the local size used in creating the 3989 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3990 calculated if N is given) For square matrices n is almost always m. 3991 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3992 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3993 . i - row indices 3994 . j - column indices 3995 - a - matrix values 3996 3997 Output Parameter: 3998 . mat - the matrix 3999 4000 Level: intermediate 4001 4002 Notes: 4003 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4004 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4005 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4006 4007 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4008 4009 The format which is used for the sparse matrix input, is equivalent to a 4010 row-major ordering.. i.e for the following matrix, the input data expected is 4011 as shown: 4012 4013 1 0 0 4014 2 0 3 P0 4015 ------- 4016 4 5 6 P1 4017 4018 Process0 [P0]: rows_owned=[0,1] 4019 i = {0,1,3} [size = nrow+1 = 2+1] 4020 j = {0,0,2} [size = nz = 6] 4021 v = {1,2,3} [size = nz = 6] 4022 4023 Process1 [P1]: rows_owned=[2] 4024 i = {0,3} [size = nrow+1 = 1+1] 4025 j = {0,1,2} [size = nz = 6] 4026 v = {4,5,6} [size = nz = 6] 4027 4028 .keywords: matrix, aij, compressed row, sparse, parallel 4029 4030 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4031 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4032 @*/ 4033 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4034 { 4035 PetscErrorCode ierr; 4036 4037 PetscFunctionBegin; 4038 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4039 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4040 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4041 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4042 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4043 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4044 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4045 PetscFunctionReturn(0); 4046 } 4047 4048 #undef __FUNCT__ 4049 #define __FUNCT__ "MatCreateAIJ" 4050 /*@C 4051 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4052 (the default parallel PETSc format). For good matrix assembly performance 4053 the user should preallocate the matrix storage by setting the parameters 4054 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4055 performance can be increased by more than a factor of 50. 4056 4057 Collective on MPI_Comm 4058 4059 Input Parameters: 4060 + comm - MPI communicator 4061 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4062 This value should be the same as the local size used in creating the 4063 y vector for the matrix-vector product y = Ax. 4064 . n - This value should be the same as the local size used in creating the 4065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4066 calculated if N is given) For square matrices n is almost always m. 4067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4069 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4070 (same value is used for all local rows) 4071 . d_nnz - array containing the number of nonzeros in the various rows of the 4072 DIAGONAL portion of the local submatrix (possibly different for each row) 4073 or NULL, if d_nz is used to specify the nonzero structure. 4074 The size of this array is equal to the number of local rows, i.e 'm'. 4075 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4076 submatrix (same value is used for all local rows). 4077 - o_nnz - array containing the number of nonzeros in the various rows of the 4078 OFF-DIAGONAL portion of the local submatrix (possibly different for 4079 each row) or NULL, if o_nz is used to specify the nonzero 4080 structure. The size of this array is equal to the number 4081 of local rows, i.e 'm'. 4082 4083 Output Parameter: 4084 . A - the matrix 4085 4086 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4087 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4088 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4089 4090 Notes: 4091 If the *_nnz parameter is given then the *_nz parameter is ignored 4092 4093 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4094 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4095 storage requirements for this matrix. 4096 4097 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4098 processor than it must be used on all processors that share the object for 4099 that argument. 4100 4101 The user MUST specify either the local or global matrix dimensions 4102 (possibly both). 4103 4104 The parallel matrix is partitioned across processors such that the 4105 first m0 rows belong to process 0, the next m1 rows belong to 4106 process 1, the next m2 rows belong to process 2 etc.. where 4107 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4108 values corresponding to [m x N] submatrix. 4109 4110 The columns are logically partitioned with the n0 columns belonging 4111 to 0th partition, the next n1 columns belonging to the next 4112 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4113 4114 The DIAGONAL portion of the local submatrix on any given processor 4115 is the submatrix corresponding to the rows and columns m,n 4116 corresponding to the given processor. i.e diagonal matrix on 4117 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4118 etc. The remaining portion of the local submatrix [m x (N-n)] 4119 constitute the OFF-DIAGONAL portion. The example below better 4120 illustrates this concept. 4121 4122 For a square global matrix we define each processor's diagonal portion 4123 to be its local rows and the corresponding columns (a square submatrix); 4124 each processor's off-diagonal portion encompasses the remainder of the 4125 local matrix (a rectangular submatrix). 4126 4127 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4128 4129 When calling this routine with a single process communicator, a matrix of 4130 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4131 type of communicator, use the construction mechanism: 4132 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4133 4134 By default, this format uses inodes (identical nodes) when possible. 4135 We search for consecutive rows with the same nonzero structure, thereby 4136 reusing matrix information to achieve increased efficiency. 4137 4138 Options Database Keys: 4139 + -mat_no_inode - Do not use inodes 4140 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4141 - -mat_aij_oneindex - Internally use indexing starting at 1 4142 rather than 0. Note that when calling MatSetValues(), 4143 the user still MUST index entries starting at 0! 4144 4145 4146 Example usage: 4147 4148 Consider the following 8x8 matrix with 34 non-zero values, that is 4149 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4150 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4151 as follows: 4152 4153 .vb 4154 1 2 0 | 0 3 0 | 0 4 4155 Proc0 0 5 6 | 7 0 0 | 8 0 4156 9 0 10 | 11 0 0 | 12 0 4157 ------------------------------------- 4158 13 0 14 | 15 16 17 | 0 0 4159 Proc1 0 18 0 | 19 20 21 | 0 0 4160 0 0 0 | 22 23 0 | 24 0 4161 ------------------------------------- 4162 Proc2 25 26 27 | 0 0 28 | 29 0 4163 30 0 0 | 31 32 33 | 0 34 4164 .ve 4165 4166 This can be represented as a collection of submatrices as: 4167 4168 .vb 4169 A B C 4170 D E F 4171 G H I 4172 .ve 4173 4174 Where the submatrices A,B,C are owned by proc0, D,E,F are 4175 owned by proc1, G,H,I are owned by proc2. 4176 4177 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4178 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4179 The 'M','N' parameters are 8,8, and have the same values on all procs. 4180 4181 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4182 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4183 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4184 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4185 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4186 matrix, ans [DF] as another SeqAIJ matrix. 4187 4188 When d_nz, o_nz parameters are specified, d_nz storage elements are 4189 allocated for every row of the local diagonal submatrix, and o_nz 4190 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4191 One way to choose d_nz and o_nz is to use the max nonzerors per local 4192 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4193 In this case, the values of d_nz,o_nz are: 4194 .vb 4195 proc0 : dnz = 2, o_nz = 2 4196 proc1 : dnz = 3, o_nz = 2 4197 proc2 : dnz = 1, o_nz = 4 4198 .ve 4199 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4200 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4201 for proc3. i.e we are using 12+15+10=37 storage locations to store 4202 34 values. 4203 4204 When d_nnz, o_nnz parameters are specified, the storage is specified 4205 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4206 In the above case the values for d_nnz,o_nnz are: 4207 .vb 4208 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4209 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4210 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4211 .ve 4212 Here the space allocated is sum of all the above values i.e 34, and 4213 hence pre-allocation is perfect. 4214 4215 Level: intermediate 4216 4217 .keywords: matrix, aij, compressed row, sparse, parallel 4218 4219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4220 MPIAIJ, MatCreateMPIAIJWithArrays() 4221 @*/ 4222 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4223 { 4224 PetscErrorCode ierr; 4225 PetscMPIInt size; 4226 4227 PetscFunctionBegin; 4228 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4229 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4230 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4231 if (size > 1) { 4232 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4233 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4234 } else { 4235 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4236 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4237 } 4238 PetscFunctionReturn(0); 4239 } 4240 4241 #undef __FUNCT__ 4242 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4243 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4244 { 4245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4246 4247 PetscFunctionBegin; 4248 *Ad = a->A; 4249 *Ao = a->B; 4250 *colmap = a->garray; 4251 PetscFunctionReturn(0); 4252 } 4253 4254 #undef __FUNCT__ 4255 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4256 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4257 { 4258 PetscErrorCode ierr; 4259 PetscInt i; 4260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4261 4262 PetscFunctionBegin; 4263 if (coloring->ctype == IS_COLORING_GLOBAL) { 4264 ISColoringValue *allcolors,*colors; 4265 ISColoring ocoloring; 4266 4267 /* set coloring for diagonal portion */ 4268 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4269 4270 /* set coloring for off-diagonal portion */ 4271 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4272 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4273 for (i=0; i<a->B->cmap->n; i++) { 4274 colors[i] = allcolors[a->garray[i]]; 4275 } 4276 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4277 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4278 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4279 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4280 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4281 ISColoringValue *colors; 4282 PetscInt *larray; 4283 ISColoring ocoloring; 4284 4285 /* set coloring for diagonal portion */ 4286 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4287 for (i=0; i<a->A->cmap->n; i++) { 4288 larray[i] = i + A->cmap->rstart; 4289 } 4290 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4291 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4292 for (i=0; i<a->A->cmap->n; i++) { 4293 colors[i] = coloring->colors[larray[i]]; 4294 } 4295 ierr = PetscFree(larray);CHKERRQ(ierr); 4296 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4297 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4298 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4299 4300 /* set coloring for off-diagonal portion */ 4301 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4302 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4303 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4304 for (i=0; i<a->B->cmap->n; i++) { 4305 colors[i] = coloring->colors[larray[i]]; 4306 } 4307 ierr = PetscFree(larray);CHKERRQ(ierr); 4308 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4309 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4310 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4311 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 #undef __FUNCT__ 4316 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4317 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4318 { 4319 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4320 PetscErrorCode ierr; 4321 4322 PetscFunctionBegin; 4323 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4324 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4325 PetscFunctionReturn(0); 4326 } 4327 4328 #undef __FUNCT__ 4329 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4330 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4331 { 4332 PetscErrorCode ierr; 4333 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4334 PetscInt *indx; 4335 4336 PetscFunctionBegin; 4337 /* This routine will ONLY return MPIAIJ type matrix */ 4338 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4339 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4340 if (n == PETSC_DECIDE) { 4341 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4342 } 4343 /* Check sum(n) = N */ 4344 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4345 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4346 4347 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4348 rstart -= m; 4349 4350 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4351 for (i=0; i<m; i++) { 4352 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4353 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4354 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4355 } 4356 4357 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4358 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4359 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4360 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4361 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4362 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4363 PetscFunctionReturn(0); 4364 } 4365 4366 #undef __FUNCT__ 4367 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4368 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4369 { 4370 PetscErrorCode ierr; 4371 PetscInt m,N,i,rstart,nnz,Ii; 4372 PetscInt *indx; 4373 PetscScalar *values; 4374 4375 PetscFunctionBegin; 4376 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4377 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4378 for (i=0; i<m; i++) { 4379 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4380 Ii = i + rstart; 4381 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4382 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4383 } 4384 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4385 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4386 PetscFunctionReturn(0); 4387 } 4388 4389 #undef __FUNCT__ 4390 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4391 /*@ 4392 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4393 matrices from each processor 4394 4395 Collective on MPI_Comm 4396 4397 Input Parameters: 4398 + comm - the communicators the parallel matrix will live on 4399 . inmat - the input sequential matrices 4400 . n - number of local columns (or PETSC_DECIDE) 4401 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4402 4403 Output Parameter: 4404 . outmat - the parallel matrix generated 4405 4406 Level: advanced 4407 4408 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4409 4410 @*/ 4411 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4412 { 4413 PetscErrorCode ierr; 4414 PetscMPIInt size; 4415 4416 PetscFunctionBegin; 4417 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4418 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4419 if (size == 1) { 4420 if (scall == MAT_INITIAL_MATRIX) { 4421 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4422 } else { 4423 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4424 } 4425 } else { 4426 if (scall == MAT_INITIAL_MATRIX) { 4427 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4428 } 4429 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4430 } 4431 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4432 PetscFunctionReturn(0); 4433 } 4434 4435 #undef __FUNCT__ 4436 #define __FUNCT__ "MatFileSplit" 4437 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4438 { 4439 PetscErrorCode ierr; 4440 PetscMPIInt rank; 4441 PetscInt m,N,i,rstart,nnz; 4442 size_t len; 4443 const PetscInt *indx; 4444 PetscViewer out; 4445 char *name; 4446 Mat B; 4447 const PetscScalar *values; 4448 4449 PetscFunctionBegin; 4450 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4451 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4452 /* Should this be the type of the diagonal block of A? */ 4453 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4454 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4455 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4456 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4457 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4458 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4459 for (i=0; i<m; i++) { 4460 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4461 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4462 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4463 } 4464 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4465 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4466 4467 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4468 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4469 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4470 sprintf(name,"%s.%d",outfile,rank); 4471 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4472 ierr = PetscFree(name);CHKERRQ(ierr); 4473 ierr = MatView(B,out);CHKERRQ(ierr); 4474 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4475 ierr = MatDestroy(&B);CHKERRQ(ierr); 4476 PetscFunctionReturn(0); 4477 } 4478 4479 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4480 #undef __FUNCT__ 4481 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4482 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4483 { 4484 PetscErrorCode ierr; 4485 Mat_Merge_SeqsToMPI *merge; 4486 PetscContainer container; 4487 4488 PetscFunctionBegin; 4489 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4490 if (container) { 4491 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4504 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4505 ierr = PetscFree(merge);CHKERRQ(ierr); 4506 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4507 } 4508 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #include <../src/mat/utils/freespace.h> 4513 #include <petscbt.h> 4514 4515 #undef __FUNCT__ 4516 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4517 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4518 { 4519 PetscErrorCode ierr; 4520 MPI_Comm comm; 4521 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4522 PetscMPIInt size,rank,taga,*len_s; 4523 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4524 PetscInt proc,m; 4525 PetscInt **buf_ri,**buf_rj; 4526 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4527 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4528 MPI_Request *s_waits,*r_waits; 4529 MPI_Status *status; 4530 MatScalar *aa=a->a; 4531 MatScalar **abuf_r,*ba_i; 4532 Mat_Merge_SeqsToMPI *merge; 4533 PetscContainer container; 4534 4535 PetscFunctionBegin; 4536 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4537 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4538 4539 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4540 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4541 4542 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4543 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4544 4545 bi = merge->bi; 4546 bj = merge->bj; 4547 buf_ri = merge->buf_ri; 4548 buf_rj = merge->buf_rj; 4549 4550 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4551 owners = merge->rowmap->range; 4552 len_s = merge->len_s; 4553 4554 /* send and recv matrix values */ 4555 /*-----------------------------*/ 4556 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4557 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4558 4559 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4560 for (proc=0,k=0; proc<size; proc++) { 4561 if (!len_s[proc]) continue; 4562 i = owners[proc]; 4563 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4564 k++; 4565 } 4566 4567 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4568 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4569 ierr = PetscFree(status);CHKERRQ(ierr); 4570 4571 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4572 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4573 4574 /* insert mat values of mpimat */ 4575 /*----------------------------*/ 4576 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4577 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4578 4579 for (k=0; k<merge->nrecv; k++) { 4580 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4581 nrows = *(buf_ri_k[k]); 4582 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4583 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4584 } 4585 4586 /* set values of ba */ 4587 m = merge->rowmap->n; 4588 for (i=0; i<m; i++) { 4589 arow = owners[rank] + i; 4590 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4591 bnzi = bi[i+1] - bi[i]; 4592 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4593 4594 /* add local non-zero vals of this proc's seqmat into ba */ 4595 anzi = ai[arow+1] - ai[arow]; 4596 aj = a->j + ai[arow]; 4597 aa = a->a + ai[arow]; 4598 nextaj = 0; 4599 for (j=0; nextaj<anzi; j++) { 4600 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4601 ba_i[j] += aa[nextaj++]; 4602 } 4603 } 4604 4605 /* add received vals into ba */ 4606 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4607 /* i-th row */ 4608 if (i == *nextrow[k]) { 4609 anzi = *(nextai[k]+1) - *nextai[k]; 4610 aj = buf_rj[k] + *(nextai[k]); 4611 aa = abuf_r[k] + *(nextai[k]); 4612 nextaj = 0; 4613 for (j=0; nextaj<anzi; j++) { 4614 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4615 ba_i[j] += aa[nextaj++]; 4616 } 4617 } 4618 nextrow[k]++; nextai[k]++; 4619 } 4620 } 4621 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4622 } 4623 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4627 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4628 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4629 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4630 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4631 PetscFunctionReturn(0); 4632 } 4633 4634 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4638 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4639 { 4640 PetscErrorCode ierr; 4641 Mat B_mpi; 4642 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4643 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4644 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4645 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4646 PetscInt len,proc,*dnz,*onz,bs,cbs; 4647 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4648 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4649 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4650 MPI_Status *status; 4651 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4652 PetscBT lnkbt; 4653 Mat_Merge_SeqsToMPI *merge; 4654 PetscContainer container; 4655 4656 PetscFunctionBegin; 4657 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4658 4659 /* make sure it is a PETSc comm */ 4660 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4662 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4663 4664 ierr = PetscNew(&merge);CHKERRQ(ierr); 4665 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4666 4667 /* determine row ownership */ 4668 /*---------------------------------------------------------*/ 4669 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4672 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4674 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4675 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4676 4677 m = merge->rowmap->n; 4678 owners = merge->rowmap->range; 4679 4680 /* determine the number of messages to send, their lengths */ 4681 /*---------------------------------------------------------*/ 4682 len_s = merge->len_s; 4683 4684 len = 0; /* length of buf_si[] */ 4685 merge->nsend = 0; 4686 for (proc=0; proc<size; proc++) { 4687 len_si[proc] = 0; 4688 if (proc == rank) { 4689 len_s[proc] = 0; 4690 } else { 4691 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4692 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4693 } 4694 if (len_s[proc]) { 4695 merge->nsend++; 4696 nrows = 0; 4697 for (i=owners[proc]; i<owners[proc+1]; i++) { 4698 if (ai[i+1] > ai[i]) nrows++; 4699 } 4700 len_si[proc] = 2*(nrows+1); 4701 len += len_si[proc]; 4702 } 4703 } 4704 4705 /* determine the number and length of messages to receive for ij-structure */ 4706 /*-------------------------------------------------------------------------*/ 4707 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4708 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4709 4710 /* post the Irecv of j-structure */ 4711 /*-------------------------------*/ 4712 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4713 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4714 4715 /* post the Isend of j-structure */ 4716 /*--------------------------------*/ 4717 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4718 4719 for (proc=0, k=0; proc<size; proc++) { 4720 if (!len_s[proc]) continue; 4721 i = owners[proc]; 4722 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4723 k++; 4724 } 4725 4726 /* receives and sends of j-structure are complete */ 4727 /*------------------------------------------------*/ 4728 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4729 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4730 4731 /* send and recv i-structure */ 4732 /*---------------------------*/ 4733 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4734 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4735 4736 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4737 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4738 for (proc=0,k=0; proc<size; proc++) { 4739 if (!len_s[proc]) continue; 4740 /* form outgoing message for i-structure: 4741 buf_si[0]: nrows to be sent 4742 [1:nrows]: row index (global) 4743 [nrows+1:2*nrows+1]: i-structure index 4744 */ 4745 /*-------------------------------------------*/ 4746 nrows = len_si[proc]/2 - 1; 4747 buf_si_i = buf_si + nrows+1; 4748 buf_si[0] = nrows; 4749 buf_si_i[0] = 0; 4750 nrows = 0; 4751 for (i=owners[proc]; i<owners[proc+1]; i++) { 4752 anzi = ai[i+1] - ai[i]; 4753 if (anzi) { 4754 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4755 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4756 nrows++; 4757 } 4758 } 4759 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4760 k++; 4761 buf_si += len_si[proc]; 4762 } 4763 4764 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4765 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4766 4767 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4768 for (i=0; i<merge->nrecv; i++) { 4769 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4770 } 4771 4772 ierr = PetscFree(len_si);CHKERRQ(ierr); 4773 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4774 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4775 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4776 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4777 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4778 ierr = PetscFree(status);CHKERRQ(ierr); 4779 4780 /* compute a local seq matrix in each processor */ 4781 /*----------------------------------------------*/ 4782 /* allocate bi array and free space for accumulating nonzero column info */ 4783 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4784 bi[0] = 0; 4785 4786 /* create and initialize a linked list */ 4787 nlnk = N+1; 4788 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4789 4790 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4791 len = ai[owners[rank+1]] - ai[owners[rank]]; 4792 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4793 4794 current_space = free_space; 4795 4796 /* determine symbolic info for each local row */ 4797 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4798 4799 for (k=0; k<merge->nrecv; k++) { 4800 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4801 nrows = *buf_ri_k[k]; 4802 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4803 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4804 } 4805 4806 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4807 len = 0; 4808 for (i=0; i<m; i++) { 4809 bnzi = 0; 4810 /* add local non-zero cols of this proc's seqmat into lnk */ 4811 arow = owners[rank] + i; 4812 anzi = ai[arow+1] - ai[arow]; 4813 aj = a->j + ai[arow]; 4814 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4815 bnzi += nlnk; 4816 /* add received col data into lnk */ 4817 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4818 if (i == *nextrow[k]) { /* i-th row */ 4819 anzi = *(nextai[k]+1) - *nextai[k]; 4820 aj = buf_rj[k] + *nextai[k]; 4821 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4822 bnzi += nlnk; 4823 nextrow[k]++; nextai[k]++; 4824 } 4825 } 4826 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4827 4828 /* if free space is not available, make more free space */ 4829 if (current_space->local_remaining<bnzi) { 4830 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4831 nspacedouble++; 4832 } 4833 /* copy data into free space, then initialize lnk */ 4834 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4835 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4836 4837 current_space->array += bnzi; 4838 current_space->local_used += bnzi; 4839 current_space->local_remaining -= bnzi; 4840 4841 bi[i+1] = bi[i] + bnzi; 4842 } 4843 4844 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4845 4846 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4847 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4848 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4849 4850 /* create symbolic parallel matrix B_mpi */ 4851 /*---------------------------------------*/ 4852 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4853 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4854 if (n==PETSC_DECIDE) { 4855 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4856 } else { 4857 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4858 } 4859 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4860 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4861 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4862 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4863 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4864 4865 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4866 B_mpi->assembled = PETSC_FALSE; 4867 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4868 merge->bi = bi; 4869 merge->bj = bj; 4870 merge->buf_ri = buf_ri; 4871 merge->buf_rj = buf_rj; 4872 merge->coi = NULL; 4873 merge->coj = NULL; 4874 merge->owners_co = NULL; 4875 4876 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4877 4878 /* attach the supporting struct to B_mpi for reuse */ 4879 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4880 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4881 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4882 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4883 *mpimat = B_mpi; 4884 4885 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4886 PetscFunctionReturn(0); 4887 } 4888 4889 #undef __FUNCT__ 4890 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4891 /*@C 4892 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4893 matrices from each processor 4894 4895 Collective on MPI_Comm 4896 4897 Input Parameters: 4898 + comm - the communicators the parallel matrix will live on 4899 . seqmat - the input sequential matrices 4900 . m - number of local rows (or PETSC_DECIDE) 4901 . n - number of local columns (or PETSC_DECIDE) 4902 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4903 4904 Output Parameter: 4905 . mpimat - the parallel matrix generated 4906 4907 Level: advanced 4908 4909 Notes: 4910 The dimensions of the sequential matrix in each processor MUST be the same. 4911 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4912 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4913 @*/ 4914 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4915 { 4916 PetscErrorCode ierr; 4917 PetscMPIInt size; 4918 4919 PetscFunctionBegin; 4920 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4921 if (size == 1) { 4922 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4923 if (scall == MAT_INITIAL_MATRIX) { 4924 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4925 } else { 4926 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4927 } 4928 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4932 if (scall == MAT_INITIAL_MATRIX) { 4933 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4934 } 4935 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4936 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4937 PetscFunctionReturn(0); 4938 } 4939 4940 #undef __FUNCT__ 4941 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4942 /*@ 4943 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4944 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4945 with MatGetSize() 4946 4947 Not Collective 4948 4949 Input Parameters: 4950 + A - the matrix 4951 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4952 4953 Output Parameter: 4954 . A_loc - the local sequential matrix generated 4955 4956 Level: developer 4957 4958 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4959 4960 @*/ 4961 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4962 { 4963 PetscErrorCode ierr; 4964 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4965 Mat_SeqAIJ *mat,*a,*b; 4966 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4967 MatScalar *aa,*ba,*cam; 4968 PetscScalar *ca; 4969 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4970 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4971 PetscBool match; 4972 4973 PetscFunctionBegin; 4974 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4975 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4976 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4977 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4978 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4979 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4980 aa = a->a; ba = b->a; 4981 if (scall == MAT_INITIAL_MATRIX) { 4982 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4983 ci[0] = 0; 4984 for (i=0; i<am; i++) { 4985 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4986 } 4987 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4988 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4989 k = 0; 4990 for (i=0; i<am; i++) { 4991 ncols_o = bi[i+1] - bi[i]; 4992 ncols_d = ai[i+1] - ai[i]; 4993 /* off-diagonal portion of A */ 4994 for (jo=0; jo<ncols_o; jo++) { 4995 col = cmap[*bj]; 4996 if (col >= cstart) break; 4997 cj[k] = col; bj++; 4998 ca[k++] = *ba++; 4999 } 5000 /* diagonal portion of A */ 5001 for (j=0; j<ncols_d; j++) { 5002 cj[k] = cstart + *aj++; 5003 ca[k++] = *aa++; 5004 } 5005 /* off-diagonal portion of A */ 5006 for (j=jo; j<ncols_o; j++) { 5007 cj[k] = cmap[*bj++]; 5008 ca[k++] = *ba++; 5009 } 5010 } 5011 /* put together the new matrix */ 5012 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5013 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5014 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5015 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5016 mat->free_a = PETSC_TRUE; 5017 mat->free_ij = PETSC_TRUE; 5018 mat->nonew = 0; 5019 } else if (scall == MAT_REUSE_MATRIX) { 5020 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5021 ci = mat->i; cj = mat->j; cam = mat->a; 5022 for (i=0; i<am; i++) { 5023 /* off-diagonal portion of A */ 5024 ncols_o = bi[i+1] - bi[i]; 5025 for (jo=0; jo<ncols_o; jo++) { 5026 col = cmap[*bj]; 5027 if (col >= cstart) break; 5028 *cam++ = *ba++; bj++; 5029 } 5030 /* diagonal portion of A */ 5031 ncols_d = ai[i+1] - ai[i]; 5032 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5033 /* off-diagonal portion of A */ 5034 for (j=jo; j<ncols_o; j++) { 5035 *cam++ = *ba++; bj++; 5036 } 5037 } 5038 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5039 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5040 PetscFunctionReturn(0); 5041 } 5042 5043 #undef __FUNCT__ 5044 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5045 /*@C 5046 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5047 5048 Not Collective 5049 5050 Input Parameters: 5051 + A - the matrix 5052 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5053 - row, col - index sets of rows and columns to extract (or NULL) 5054 5055 Output Parameter: 5056 . A_loc - the local sequential matrix generated 5057 5058 Level: developer 5059 5060 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5061 5062 @*/ 5063 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5064 { 5065 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5066 PetscErrorCode ierr; 5067 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5068 IS isrowa,iscola; 5069 Mat *aloc; 5070 PetscBool match; 5071 5072 PetscFunctionBegin; 5073 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5074 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5075 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5076 if (!row) { 5077 start = A->rmap->rstart; end = A->rmap->rend; 5078 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5079 } else { 5080 isrowa = *row; 5081 } 5082 if (!col) { 5083 start = A->cmap->rstart; 5084 cmap = a->garray; 5085 nzA = a->A->cmap->n; 5086 nzB = a->B->cmap->n; 5087 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5088 ncols = 0; 5089 for (i=0; i<nzB; i++) { 5090 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5091 else break; 5092 } 5093 imark = i; 5094 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5095 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5096 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5097 } else { 5098 iscola = *col; 5099 } 5100 if (scall != MAT_INITIAL_MATRIX) { 5101 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5102 aloc[0] = *A_loc; 5103 } 5104 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5105 *A_loc = aloc[0]; 5106 ierr = PetscFree(aloc);CHKERRQ(ierr); 5107 if (!row) { 5108 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5109 } 5110 if (!col) { 5111 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5112 } 5113 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5114 PetscFunctionReturn(0); 5115 } 5116 5117 #undef __FUNCT__ 5118 #define __FUNCT__ "MatGetBrowsOfAcols" 5119 /*@C 5120 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5121 5122 Collective on Mat 5123 5124 Input Parameters: 5125 + A,B - the matrices in mpiaij format 5126 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5127 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5128 5129 Output Parameter: 5130 + rowb, colb - index sets of rows and columns of B to extract 5131 - B_seq - the sequential matrix generated 5132 5133 Level: developer 5134 5135 @*/ 5136 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5137 { 5138 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5139 PetscErrorCode ierr; 5140 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5141 IS isrowb,iscolb; 5142 Mat *bseq=NULL; 5143 5144 PetscFunctionBegin; 5145 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5146 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5147 } 5148 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5149 5150 if (scall == MAT_INITIAL_MATRIX) { 5151 start = A->cmap->rstart; 5152 cmap = a->garray; 5153 nzA = a->A->cmap->n; 5154 nzB = a->B->cmap->n; 5155 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5156 ncols = 0; 5157 for (i=0; i<nzB; i++) { /* row < local row index */ 5158 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5159 else break; 5160 } 5161 imark = i; 5162 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5163 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5164 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5165 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5166 } else { 5167 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5168 isrowb = *rowb; iscolb = *colb; 5169 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5170 bseq[0] = *B_seq; 5171 } 5172 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5173 *B_seq = bseq[0]; 5174 ierr = PetscFree(bseq);CHKERRQ(ierr); 5175 if (!rowb) { 5176 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5177 } else { 5178 *rowb = isrowb; 5179 } 5180 if (!colb) { 5181 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5182 } else { 5183 *colb = iscolb; 5184 } 5185 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5186 PetscFunctionReturn(0); 5187 } 5188 5189 #undef __FUNCT__ 5190 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5191 /* 5192 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5193 of the OFF-DIAGONAL portion of local A 5194 5195 Collective on Mat 5196 5197 Input Parameters: 5198 + A,B - the matrices in mpiaij format 5199 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5200 5201 Output Parameter: 5202 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5203 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5204 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5205 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5206 5207 Level: developer 5208 5209 */ 5210 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5211 { 5212 VecScatter_MPI_General *gen_to,*gen_from; 5213 PetscErrorCode ierr; 5214 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5215 Mat_SeqAIJ *b_oth; 5216 VecScatter ctx =a->Mvctx; 5217 MPI_Comm comm; 5218 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5219 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5220 PetscScalar *rvalues,*svalues; 5221 MatScalar *b_otha,*bufa,*bufA; 5222 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5223 MPI_Request *rwaits = NULL,*swaits = NULL; 5224 MPI_Status *sstatus,rstatus; 5225 PetscMPIInt jj; 5226 PetscInt *cols,sbs,rbs; 5227 PetscScalar *vals; 5228 5229 PetscFunctionBegin; 5230 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5231 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5232 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5233 } 5234 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5235 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5236 5237 gen_to = (VecScatter_MPI_General*)ctx->todata; 5238 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5239 rvalues = gen_from->values; /* holds the length of receiving row */ 5240 svalues = gen_to->values; /* holds the length of sending row */ 5241 nrecvs = gen_from->n; 5242 nsends = gen_to->n; 5243 5244 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5245 srow = gen_to->indices; /* local row index to be sent */ 5246 sstarts = gen_to->starts; 5247 sprocs = gen_to->procs; 5248 sstatus = gen_to->sstatus; 5249 sbs = gen_to->bs; 5250 rstarts = gen_from->starts; 5251 rprocs = gen_from->procs; 5252 rbs = gen_from->bs; 5253 5254 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5255 if (scall == MAT_INITIAL_MATRIX) { 5256 /* i-array */ 5257 /*---------*/ 5258 /* post receives */ 5259 for (i=0; i<nrecvs; i++) { 5260 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5261 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5262 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5263 } 5264 5265 /* pack the outgoing message */ 5266 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5267 5268 sstartsj[0] = 0; 5269 rstartsj[0] = 0; 5270 len = 0; /* total length of j or a array to be sent */ 5271 k = 0; 5272 for (i=0; i<nsends; i++) { 5273 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5274 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5275 for (j=0; j<nrows; j++) { 5276 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5277 for (l=0; l<sbs; l++) { 5278 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5279 5280 rowlen[j*sbs+l] = ncols; 5281 5282 len += ncols; 5283 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5284 } 5285 k++; 5286 } 5287 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5288 5289 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5290 } 5291 /* recvs and sends of i-array are completed */ 5292 i = nrecvs; 5293 while (i--) { 5294 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5295 } 5296 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5297 5298 /* allocate buffers for sending j and a arrays */ 5299 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5300 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5301 5302 /* create i-array of B_oth */ 5303 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5304 5305 b_othi[0] = 0; 5306 len = 0; /* total length of j or a array to be received */ 5307 k = 0; 5308 for (i=0; i<nrecvs; i++) { 5309 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5310 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5311 for (j=0; j<nrows; j++) { 5312 b_othi[k+1] = b_othi[k] + rowlen[j]; 5313 len += rowlen[j]; k++; 5314 } 5315 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5316 } 5317 5318 /* allocate space for j and a arrrays of B_oth */ 5319 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5320 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5321 5322 /* j-array */ 5323 /*---------*/ 5324 /* post receives of j-array */ 5325 for (i=0; i<nrecvs; i++) { 5326 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5327 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5328 } 5329 5330 /* pack the outgoing message j-array */ 5331 k = 0; 5332 for (i=0; i<nsends; i++) { 5333 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5334 bufJ = bufj+sstartsj[i]; 5335 for (j=0; j<nrows; j++) { 5336 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5337 for (ll=0; ll<sbs; ll++) { 5338 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5339 for (l=0; l<ncols; l++) { 5340 *bufJ++ = cols[l]; 5341 } 5342 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5343 } 5344 } 5345 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5346 } 5347 5348 /* recvs and sends of j-array are completed */ 5349 i = nrecvs; 5350 while (i--) { 5351 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5352 } 5353 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5354 } else if (scall == MAT_REUSE_MATRIX) { 5355 sstartsj = *startsj_s; 5356 rstartsj = *startsj_r; 5357 bufa = *bufa_ptr; 5358 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5359 b_otha = b_oth->a; 5360 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5361 5362 /* a-array */ 5363 /*---------*/ 5364 /* post receives of a-array */ 5365 for (i=0; i<nrecvs; i++) { 5366 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5367 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5368 } 5369 5370 /* pack the outgoing message a-array */ 5371 k = 0; 5372 for (i=0; i<nsends; i++) { 5373 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5374 bufA = bufa+sstartsj[i]; 5375 for (j=0; j<nrows; j++) { 5376 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5377 for (ll=0; ll<sbs; ll++) { 5378 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5379 for (l=0; l<ncols; l++) { 5380 *bufA++ = vals[l]; 5381 } 5382 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5383 } 5384 } 5385 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5386 } 5387 /* recvs and sends of a-array are completed */ 5388 i = nrecvs; 5389 while (i--) { 5390 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5391 } 5392 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5393 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5394 5395 if (scall == MAT_INITIAL_MATRIX) { 5396 /* put together the new matrix */ 5397 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5398 5399 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5400 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5401 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5402 b_oth->free_a = PETSC_TRUE; 5403 b_oth->free_ij = PETSC_TRUE; 5404 b_oth->nonew = 0; 5405 5406 ierr = PetscFree(bufj);CHKERRQ(ierr); 5407 if (!startsj_s || !bufa_ptr) { 5408 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5409 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5410 } else { 5411 *startsj_s = sstartsj; 5412 *startsj_r = rstartsj; 5413 *bufa_ptr = bufa; 5414 } 5415 } 5416 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5417 PetscFunctionReturn(0); 5418 } 5419 5420 #undef __FUNCT__ 5421 #define __FUNCT__ "MatGetCommunicationStructs" 5422 /*@C 5423 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5424 5425 Not Collective 5426 5427 Input Parameters: 5428 . A - The matrix in mpiaij format 5429 5430 Output Parameter: 5431 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5432 . colmap - A map from global column index to local index into lvec 5433 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5434 5435 Level: developer 5436 5437 @*/ 5438 #if defined(PETSC_USE_CTABLE) 5439 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5440 #else 5441 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5442 #endif 5443 { 5444 Mat_MPIAIJ *a; 5445 5446 PetscFunctionBegin; 5447 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5448 PetscValidPointer(lvec, 2); 5449 PetscValidPointer(colmap, 3); 5450 PetscValidPointer(multScatter, 4); 5451 a = (Mat_MPIAIJ*) A->data; 5452 if (lvec) *lvec = a->lvec; 5453 if (colmap) *colmap = a->colmap; 5454 if (multScatter) *multScatter = a->Mvctx; 5455 PetscFunctionReturn(0); 5456 } 5457 5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5460 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5461 5462 #undef __FUNCT__ 5463 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5464 /* 5465 Computes (B'*A')' since computing B*A directly is untenable 5466 5467 n p p 5468 ( ) ( ) ( ) 5469 m ( A ) * n ( B ) = m ( C ) 5470 ( ) ( ) ( ) 5471 5472 */ 5473 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5474 { 5475 PetscErrorCode ierr; 5476 Mat At,Bt,Ct; 5477 5478 PetscFunctionBegin; 5479 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5480 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5481 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5482 ierr = MatDestroy(&At);CHKERRQ(ierr); 5483 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5484 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5485 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5486 PetscFunctionReturn(0); 5487 } 5488 5489 #undef __FUNCT__ 5490 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5491 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5492 { 5493 PetscErrorCode ierr; 5494 PetscInt m=A->rmap->n,n=B->cmap->n; 5495 Mat Cmat; 5496 5497 PetscFunctionBegin; 5498 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5499 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5500 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5501 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5502 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5503 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5504 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5505 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5506 5507 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5508 5509 *C = Cmat; 5510 PetscFunctionReturn(0); 5511 } 5512 5513 /* ----------------------------------------------------------------*/ 5514 #undef __FUNCT__ 5515 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5516 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5517 { 5518 PetscErrorCode ierr; 5519 5520 PetscFunctionBegin; 5521 if (scall == MAT_INITIAL_MATRIX) { 5522 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5523 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5524 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5525 } 5526 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5527 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5528 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5529 PetscFunctionReturn(0); 5530 } 5531 5532 #if defined(PETSC_HAVE_MUMPS) 5533 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5534 #endif 5535 #if defined(PETSC_HAVE_PASTIX) 5536 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5537 #endif 5538 #if defined(PETSC_HAVE_SUPERLU_DIST) 5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_CLIQUE) 5542 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5543 #endif 5544 5545 /*MC 5546 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5547 5548 Options Database Keys: 5549 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5550 5551 Level: beginner 5552 5553 .seealso: MatCreateAIJ() 5554 M*/ 5555 5556 #undef __FUNCT__ 5557 #define __FUNCT__ "MatCreate_MPIAIJ" 5558 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5559 { 5560 Mat_MPIAIJ *b; 5561 PetscErrorCode ierr; 5562 PetscMPIInt size; 5563 5564 PetscFunctionBegin; 5565 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5566 5567 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5568 B->data = (void*)b; 5569 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5570 B->assembled = PETSC_FALSE; 5571 B->insertmode = NOT_SET_VALUES; 5572 b->size = size; 5573 5574 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5575 5576 /* build cache for off array entries formed */ 5577 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5578 5579 b->donotstash = PETSC_FALSE; 5580 b->colmap = 0; 5581 b->garray = 0; 5582 b->roworiented = PETSC_TRUE; 5583 5584 /* stuff used for matrix vector multiply */ 5585 b->lvec = NULL; 5586 b->Mvctx = NULL; 5587 5588 /* stuff for MatGetRow() */ 5589 b->rowindices = 0; 5590 b->rowvalues = 0; 5591 b->getrowactive = PETSC_FALSE; 5592 5593 /* flexible pointer used in CUSP/CUSPARSE classes */ 5594 b->spptr = NULL; 5595 5596 #if defined(PETSC_HAVE_MUMPS) 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5598 #endif 5599 #if defined(PETSC_HAVE_PASTIX) 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5601 #endif 5602 #if defined(PETSC_HAVE_SUPERLU_DIST) 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5604 #endif 5605 #if defined(PETSC_HAVE_CLIQUE) 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5607 #endif 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5622 PetscFunctionReturn(0); 5623 } 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5627 /*@ 5628 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5629 and "off-diagonal" part of the matrix in CSR format. 5630 5631 Collective on MPI_Comm 5632 5633 Input Parameters: 5634 + comm - MPI communicator 5635 . m - number of local rows (Cannot be PETSC_DECIDE) 5636 . n - This value should be the same as the local size used in creating the 5637 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5638 calculated if N is given) For square matrices n is almost always m. 5639 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5640 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5641 . i - row indices for "diagonal" portion of matrix 5642 . j - column indices 5643 . a - matrix values 5644 . oi - row indices for "off-diagonal" portion of matrix 5645 . oj - column indices 5646 - oa - matrix values 5647 5648 Output Parameter: 5649 . mat - the matrix 5650 5651 Level: advanced 5652 5653 Notes: 5654 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5655 must free the arrays once the matrix has been destroyed and not before. 5656 5657 The i and j indices are 0 based 5658 5659 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5660 5661 This sets local rows and cannot be used to set off-processor values. 5662 5663 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5664 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5665 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5666 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5667 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5668 communication if it is known that only local entries will be set. 5669 5670 .keywords: matrix, aij, compressed row, sparse, parallel 5671 5672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5673 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5674 @*/ 5675 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5676 { 5677 PetscErrorCode ierr; 5678 Mat_MPIAIJ *maij; 5679 5680 PetscFunctionBegin; 5681 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5682 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5683 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5684 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5685 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5686 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5687 maij = (Mat_MPIAIJ*) (*mat)->data; 5688 5689 (*mat)->preallocated = PETSC_TRUE; 5690 5691 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5692 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5693 5694 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5695 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5696 5697 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5698 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5701 5702 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5703 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5705 PetscFunctionReturn(0); 5706 } 5707 5708 /* 5709 Special version for direct calls from Fortran 5710 */ 5711 #include <petsc-private/fortranimpl.h> 5712 5713 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5714 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5716 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5717 #endif 5718 5719 /* Change these macros so can be used in void function */ 5720 #undef CHKERRQ 5721 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5722 #undef SETERRQ2 5723 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5724 #undef SETERRQ3 5725 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5726 #undef SETERRQ 5727 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5728 5729 #undef __FUNCT__ 5730 #define __FUNCT__ "matsetvaluesmpiaij_" 5731 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5732 { 5733 Mat mat = *mmat; 5734 PetscInt m = *mm, n = *mn; 5735 InsertMode addv = *maddv; 5736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5737 PetscScalar value; 5738 PetscErrorCode ierr; 5739 5740 MatCheckPreallocated(mat,1); 5741 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5742 5743 #if defined(PETSC_USE_DEBUG) 5744 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5745 #endif 5746 { 5747 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5748 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5749 PetscBool roworiented = aij->roworiented; 5750 5751 /* Some Variables required in the macro */ 5752 Mat A = aij->A; 5753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5754 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5755 MatScalar *aa = a->a; 5756 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5757 Mat B = aij->B; 5758 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5759 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5760 MatScalar *ba = b->a; 5761 5762 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5763 PetscInt nonew = a->nonew; 5764 MatScalar *ap1,*ap2; 5765 5766 PetscFunctionBegin; 5767 for (i=0; i<m; i++) { 5768 if (im[i] < 0) continue; 5769 #if defined(PETSC_USE_DEBUG) 5770 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5771 #endif 5772 if (im[i] >= rstart && im[i] < rend) { 5773 row = im[i] - rstart; 5774 lastcol1 = -1; 5775 rp1 = aj + ai[row]; 5776 ap1 = aa + ai[row]; 5777 rmax1 = aimax[row]; 5778 nrow1 = ailen[row]; 5779 low1 = 0; 5780 high1 = nrow1; 5781 lastcol2 = -1; 5782 rp2 = bj + bi[row]; 5783 ap2 = ba + bi[row]; 5784 rmax2 = bimax[row]; 5785 nrow2 = bilen[row]; 5786 low2 = 0; 5787 high2 = nrow2; 5788 5789 for (j=0; j<n; j++) { 5790 if (roworiented) value = v[i*n+j]; 5791 else value = v[i+j*m]; 5792 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5793 if (in[j] >= cstart && in[j] < cend) { 5794 col = in[j] - cstart; 5795 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5796 } else if (in[j] < 0) continue; 5797 #if defined(PETSC_USE_DEBUG) 5798 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5799 #endif 5800 else { 5801 if (mat->was_assembled) { 5802 if (!aij->colmap) { 5803 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5804 } 5805 #if defined(PETSC_USE_CTABLE) 5806 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5807 col--; 5808 #else 5809 col = aij->colmap[in[j]] - 1; 5810 #endif 5811 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5812 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5813 col = in[j]; 5814 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5815 B = aij->B; 5816 b = (Mat_SeqAIJ*)B->data; 5817 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5818 rp2 = bj + bi[row]; 5819 ap2 = ba + bi[row]; 5820 rmax2 = bimax[row]; 5821 nrow2 = bilen[row]; 5822 low2 = 0; 5823 high2 = nrow2; 5824 bm = aij->B->rmap->n; 5825 ba = b->a; 5826 } 5827 } else col = in[j]; 5828 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5829 } 5830 } 5831 } else if (!aij->donotstash) { 5832 if (roworiented) { 5833 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5834 } else { 5835 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5836 } 5837 } 5838 } 5839 } 5840 PetscFunctionReturnVoid(); 5841 } 5842 5843