1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt size = l->size,n = A->rmap->n,lastidx = -1; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 #if defined(PETSC_DEBUG) 837 PetscBool found = PETSC_FALSE; 838 #endif 839 840 PetscFunctionBegin; 841 /* Create SF where leaves are input rows and roots are owned rows */ 842 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 843 for (r = 0; r < n; ++r) lrows[r] = -1; 844 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 845 for (r = 0; r < N; ++r) { 846 const PetscInt idx = rows[r]; 847 PetscBool found = PETSC_FALSE; 848 /* Trick for efficient searching for sorted rows */ 849 if (lastidx > idx) p = 0; 850 lastidx = idx; 851 for (; p < size; ++p) { 852 if (idx >= owners[p] && idx < owners[p+1]) { 853 rrows[r].rank = p; 854 rrows[r].index = rows[r] - owners[p]; 855 found = PETSC_TRUE; 856 break; 857 } 858 } 859 if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx); 860 } 861 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 862 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 863 /* Collect flags for rows to be zeroed */ 864 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 865 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 866 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 867 /* Compress and put in row numbers */ 868 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 869 /* zero diagonal part of matrix */ 870 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 871 /* handle off diagonal part of matrix */ 872 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 873 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 874 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 875 for (i=0; i<len; i++) bb[lrows[i]] = 1; 876 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 877 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 880 if (x) { 881 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 884 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 885 } 886 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 887 /* remove zeroed rows of off diagonal matrix */ 888 ii = aij->i; 889 for (i=0; i<len; i++) { 890 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 891 } 892 /* loop over all elements of off process part of matrix zeroing removed columns*/ 893 if (aij->compressedrow.use) { 894 m = aij->compressedrow.nrows; 895 ii = aij->compressedrow.i; 896 ridx = aij->compressedrow.rindex; 897 for (i=0; i<m; i++) { 898 n = ii[i+1] - ii[i]; 899 aj = aij->j + ii[i]; 900 aa = aij->a + ii[i]; 901 902 for (j=0; j<n; j++) { 903 if (PetscAbsScalar(mask[*aj])) { 904 if (b) bb[*ridx] -= *aa*xx[*aj]; 905 *aa = 0.0; 906 } 907 aa++; 908 aj++; 909 } 910 ridx++; 911 } 912 } else { /* do not use compressed row format */ 913 m = l->B->rmap->n; 914 for (i=0; i<m; i++) { 915 n = ii[i+1] - ii[i]; 916 aj = aij->j + ii[i]; 917 aa = aij->a + ii[i]; 918 for (j=0; j<n; j++) { 919 if (PetscAbsScalar(mask[*aj])) { 920 if (b) bb[i] -= *aa*xx[*aj]; 921 *aa = 0.0; 922 } 923 aa++; 924 aj++; 925 } 926 } 927 } 928 if (x) { 929 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 930 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 931 } 932 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 933 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 934 ierr = PetscFree(lrows);CHKERRQ(ierr); 935 936 /* only change matrix nonzero state if pattern was allowed to be changed */ 937 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 938 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 939 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 940 } 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatMult_MPIAIJ" 946 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 947 { 948 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 949 PetscErrorCode ierr; 950 PetscInt nt; 951 952 PetscFunctionBegin; 953 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 954 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 955 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 957 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 958 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 964 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 965 { 966 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 967 PetscErrorCode ierr; 968 969 PetscFunctionBegin; 970 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatMultAdd_MPIAIJ" 976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 977 { 978 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 979 PetscErrorCode ierr; 980 981 PetscFunctionBegin; 982 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 983 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 984 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 986 PetscFunctionReturn(0); 987 } 988 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 991 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 992 { 993 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 994 PetscErrorCode ierr; 995 PetscBool merged; 996 997 PetscFunctionBegin; 998 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 999 /* do nondiagonal part */ 1000 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1001 if (!merged) { 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* do local part */ 1005 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1006 /* receive remote parts: note this assumes the values are not actually */ 1007 /* added in yy until the next line, */ 1008 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1009 } else { 1010 /* do local part */ 1011 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1012 /* send it on its way */ 1013 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1014 /* values actually were received in the Begin() but we need to call this nop */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1022 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1023 { 1024 MPI_Comm comm; 1025 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1026 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1027 IS Me,Notme; 1028 PetscErrorCode ierr; 1029 PetscInt M,N,first,last,*notme,i; 1030 PetscMPIInt size; 1031 1032 PetscFunctionBegin; 1033 /* Easy test: symmetric diagonal block */ 1034 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1035 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1036 if (!*f) PetscFunctionReturn(0); 1037 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1038 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1039 if (size == 1) PetscFunctionReturn(0); 1040 1041 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1042 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1043 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1044 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1045 for (i=0; i<first; i++) notme[i] = i; 1046 for (i=last; i<M; i++) notme[i-last+first] = i; 1047 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1048 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1049 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1050 Aoff = Aoffs[0]; 1051 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1052 Boff = Boffs[0]; 1053 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1054 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1055 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1056 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1057 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1058 ierr = PetscFree(notme);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1065 { 1066 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 1069 PetscFunctionBegin; 1070 /* do nondiagonal part */ 1071 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1072 /* send it on its way */ 1073 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1074 /* do local part */ 1075 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1076 /* receive remote parts */ 1077 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 /* 1082 This only works correctly for square matrices where the subblock A->A is the 1083 diagonal block 1084 */ 1085 #undef __FUNCT__ 1086 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1088 { 1089 PetscErrorCode ierr; 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 1092 PetscFunctionBegin; 1093 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1094 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1095 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatScale_MPIAIJ" 1101 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1102 { 1103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1104 PetscErrorCode ierr; 1105 1106 PetscFunctionBegin; 1107 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1108 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatDestroy_Redundant" 1114 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1115 { 1116 PetscErrorCode ierr; 1117 Mat_Redundant *redund = *redundant; 1118 PetscInt i; 1119 1120 PetscFunctionBegin; 1121 *redundant = NULL; 1122 if (redund){ 1123 if (redund->matseq) { /* via MatGetSubMatrices() */ 1124 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1125 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1126 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1127 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1128 } else { 1129 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1130 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1131 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1132 for (i=0; i<redund->nrecvs; i++) { 1133 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1134 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1135 } 1136 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1137 } 1138 1139 if (redund->psubcomm) { 1140 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1141 } 1142 ierr = PetscFree(redund);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatDestroy_MPIAIJ" 1149 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1150 { 1151 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1152 PetscErrorCode ierr; 1153 1154 PetscFunctionBegin; 1155 #if defined(PETSC_USE_LOG) 1156 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1157 #endif 1158 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1159 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1160 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1161 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1162 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1163 #if defined(PETSC_USE_CTABLE) 1164 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1165 #else 1166 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1167 #endif 1168 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1169 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1170 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1171 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1172 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1173 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1174 1175 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1176 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1177 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1178 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1179 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1180 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1181 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1182 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1183 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1189 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1190 { 1191 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1192 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1193 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1194 PetscErrorCode ierr; 1195 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1196 int fd; 1197 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1198 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1199 PetscScalar *column_values; 1200 PetscInt message_count,flowcontrolcount; 1201 FILE *file; 1202 1203 PetscFunctionBegin; 1204 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1205 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1206 nz = A->nz + B->nz; 1207 if (!rank) { 1208 header[0] = MAT_FILE_CLASSID; 1209 header[1] = mat->rmap->N; 1210 header[2] = mat->cmap->N; 1211 1212 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1213 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1214 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1215 /* get largest number of rows any processor has */ 1216 rlen = mat->rmap->n; 1217 range = mat->rmap->range; 1218 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1219 } else { 1220 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1221 rlen = mat->rmap->n; 1222 } 1223 1224 /* load up the local row counts */ 1225 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1226 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1227 1228 /* store the row lengths to the file */ 1229 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1230 if (!rank) { 1231 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1232 for (i=1; i<size; i++) { 1233 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1234 rlen = range[i+1] - range[i]; 1235 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1236 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1237 } 1238 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1239 } else { 1240 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1241 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1242 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1243 } 1244 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1245 1246 /* load up the local column indices */ 1247 nzmax = nz; /* th processor needs space a largest processor needs */ 1248 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1249 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1250 cnt = 0; 1251 for (i=0; i<mat->rmap->n; i++) { 1252 for (j=B->i[i]; j<B->i[i+1]; j++) { 1253 if ((col = garray[B->j[j]]) > cstart) break; 1254 column_indices[cnt++] = col; 1255 } 1256 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1257 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1258 } 1259 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1260 1261 /* store the column indices to the file */ 1262 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1263 if (!rank) { 1264 MPI_Status status; 1265 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1266 for (i=1; i<size; i++) { 1267 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1268 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1269 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1270 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1271 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1272 } 1273 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1274 } else { 1275 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1276 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1277 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1278 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1279 } 1280 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1281 1282 /* load up the local column values */ 1283 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1284 cnt = 0; 1285 for (i=0; i<mat->rmap->n; i++) { 1286 for (j=B->i[i]; j<B->i[i+1]; j++) { 1287 if (garray[B->j[j]] > cstart) break; 1288 column_values[cnt++] = B->a[j]; 1289 } 1290 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1291 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1292 } 1293 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1294 1295 /* store the column values to the file */ 1296 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1297 if (!rank) { 1298 MPI_Status status; 1299 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1300 for (i=1; i<size; i++) { 1301 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1302 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1303 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1304 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1305 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1306 } 1307 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1308 } else { 1309 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1310 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1311 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1313 } 1314 ierr = PetscFree(column_values);CHKERRQ(ierr); 1315 1316 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1317 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #include <petscdraw.h> 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1324 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1325 { 1326 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1327 PetscErrorCode ierr; 1328 PetscMPIInt rank = aij->rank,size = aij->size; 1329 PetscBool isdraw,iascii,isbinary; 1330 PetscViewer sviewer; 1331 PetscViewerFormat format; 1332 1333 PetscFunctionBegin; 1334 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1336 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1337 if (iascii) { 1338 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1339 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1340 MatInfo info; 1341 PetscBool inodes; 1342 1343 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1344 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1346 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1347 if (!inodes) { 1348 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1349 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1350 } else { 1351 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1352 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1353 } 1354 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1355 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1356 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1357 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1359 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1360 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1361 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1364 PetscInt inodecount,inodelimit,*inodes; 1365 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1366 if (inodes) { 1367 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1368 } else { 1369 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1370 } 1371 PetscFunctionReturn(0); 1372 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1373 PetscFunctionReturn(0); 1374 } 1375 } else if (isbinary) { 1376 if (size == 1) { 1377 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1378 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1379 } else { 1380 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1381 } 1382 PetscFunctionReturn(0); 1383 } else if (isdraw) { 1384 PetscDraw draw; 1385 PetscBool isnull; 1386 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1387 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1388 } 1389 1390 { 1391 /* assemble the entire matrix onto first processor. */ 1392 Mat A; 1393 Mat_SeqAIJ *Aloc; 1394 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1395 MatScalar *a; 1396 1397 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1398 if (!rank) { 1399 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1400 } else { 1401 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1402 } 1403 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1404 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1405 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1406 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1407 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1408 1409 /* copy over the A part */ 1410 Aloc = (Mat_SeqAIJ*)aij->A->data; 1411 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1412 row = mat->rmap->rstart; 1413 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1414 for (i=0; i<m; i++) { 1415 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1416 row++; 1417 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1418 } 1419 aj = Aloc->j; 1420 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1421 1422 /* copy over the B part */ 1423 Aloc = (Mat_SeqAIJ*)aij->B->data; 1424 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1425 row = mat->rmap->rstart; 1426 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1427 ct = cols; 1428 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1429 for (i=0; i<m; i++) { 1430 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1431 row++; 1432 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1433 } 1434 ierr = PetscFree(ct);CHKERRQ(ierr); 1435 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1436 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1437 /* 1438 Everyone has to call to draw the matrix since the graphics waits are 1439 synchronized across all processors that share the PetscDraw object 1440 */ 1441 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1442 if (!rank) { 1443 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1444 } 1445 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1446 ierr = MatDestroy(&A);CHKERRQ(ierr); 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatView_MPIAIJ" 1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1454 { 1455 PetscErrorCode ierr; 1456 PetscBool iascii,isdraw,issocket,isbinary; 1457 1458 PetscFunctionBegin; 1459 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1460 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1461 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1462 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1463 if (iascii || isdraw || isbinary || issocket) { 1464 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 #undef __FUNCT__ 1470 #define __FUNCT__ "MatSOR_MPIAIJ" 1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1472 { 1473 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1474 PetscErrorCode ierr; 1475 Vec bb1 = 0; 1476 PetscBool hasop; 1477 1478 PetscFunctionBegin; 1479 if (flag == SOR_APPLY_UPPER) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1485 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1486 } 1487 1488 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1489 if (flag & SOR_ZERO_INITIAL_GUESS) { 1490 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1491 its--; 1492 } 1493 1494 while (its--) { 1495 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1496 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1497 1498 /* update rhs: bb1 = bb - B*x */ 1499 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1500 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1501 1502 /* local sweep */ 1503 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1504 } 1505 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1506 if (flag & SOR_ZERO_INITIAL_GUESS) { 1507 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1508 its--; 1509 } 1510 while (its--) { 1511 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1512 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1513 1514 /* update rhs: bb1 = bb - B*x */ 1515 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1516 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1517 1518 /* local sweep */ 1519 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1520 } 1521 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1522 if (flag & SOR_ZERO_INITIAL_GUESS) { 1523 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1524 its--; 1525 } 1526 while (its--) { 1527 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1528 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1529 1530 /* update rhs: bb1 = bb - B*x */ 1531 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1532 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1533 1534 /* local sweep */ 1535 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1536 } 1537 } else if (flag & SOR_EISENSTAT) { 1538 Vec xx1; 1539 1540 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1542 1543 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1544 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1545 if (!mat->diag) { 1546 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1547 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1548 } 1549 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1550 if (hasop) { 1551 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1552 } else { 1553 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1554 } 1555 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1556 1557 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1558 1559 /* local sweep */ 1560 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1561 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1562 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1563 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1564 1565 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1566 PetscFunctionReturn(0); 1567 } 1568 1569 #undef __FUNCT__ 1570 #define __FUNCT__ "MatPermute_MPIAIJ" 1571 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1572 { 1573 Mat aA,aB,Aperm; 1574 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1575 PetscScalar *aa,*ba; 1576 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1577 PetscSF rowsf,sf; 1578 IS parcolp = NULL; 1579 PetscBool done; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1584 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1585 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1586 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1587 1588 /* Invert row permutation to find out where my rows should go */ 1589 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1590 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1591 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1593 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1594 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 1596 /* Invert column permutation to find out where my columns should go */ 1597 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1598 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1599 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1600 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1601 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1602 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1603 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1604 1605 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1606 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1607 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1608 1609 /* Find out where my gcols should go */ 1610 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1611 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1612 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1613 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1614 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1615 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1616 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1617 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1618 1619 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1620 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1621 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1622 for (i=0; i<m; i++) { 1623 PetscInt row = rdest[i],rowner; 1624 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1625 for (j=ai[i]; j<ai[i+1]; j++) { 1626 PetscInt cowner,col = cdest[aj[j]]; 1627 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1628 if (rowner == cowner) dnnz[i]++; 1629 else onnz[i]++; 1630 } 1631 for (j=bi[i]; j<bi[i+1]; j++) { 1632 PetscInt cowner,col = gcdest[bj[j]]; 1633 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1634 if (rowner == cowner) dnnz[i]++; 1635 else onnz[i]++; 1636 } 1637 } 1638 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1639 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1640 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1641 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1642 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1643 1644 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1645 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1646 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1647 for (i=0; i<m; i++) { 1648 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1649 PetscInt j0,rowlen; 1650 rowlen = ai[i+1] - ai[i]; 1651 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1652 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1653 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1654 } 1655 rowlen = bi[i+1] - bi[i]; 1656 for (j0=j=0; j<rowlen; j0=j) { 1657 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1658 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1659 } 1660 } 1661 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1662 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1663 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1664 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1665 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1666 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1667 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1668 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1669 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1670 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1671 *B = Aperm; 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1677 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1678 { 1679 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1680 Mat A = mat->A,B = mat->B; 1681 PetscErrorCode ierr; 1682 PetscReal isend[5],irecv[5]; 1683 1684 PetscFunctionBegin; 1685 info->block_size = 1.0; 1686 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1687 1688 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1689 isend[3] = info->memory; isend[4] = info->mallocs; 1690 1691 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1692 1693 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1694 isend[3] += info->memory; isend[4] += info->mallocs; 1695 if (flag == MAT_LOCAL) { 1696 info->nz_used = isend[0]; 1697 info->nz_allocated = isend[1]; 1698 info->nz_unneeded = isend[2]; 1699 info->memory = isend[3]; 1700 info->mallocs = isend[4]; 1701 } else if (flag == MAT_GLOBAL_MAX) { 1702 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1703 1704 info->nz_used = irecv[0]; 1705 info->nz_allocated = irecv[1]; 1706 info->nz_unneeded = irecv[2]; 1707 info->memory = irecv[3]; 1708 info->mallocs = irecv[4]; 1709 } else if (flag == MAT_GLOBAL_SUM) { 1710 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1711 1712 info->nz_used = irecv[0]; 1713 info->nz_allocated = irecv[1]; 1714 info->nz_unneeded = irecv[2]; 1715 info->memory = irecv[3]; 1716 info->mallocs = irecv[4]; 1717 } 1718 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1719 info->fill_ratio_needed = 0; 1720 info->factor_mallocs = 0; 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatSetOption_MPIAIJ" 1726 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1727 { 1728 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1729 PetscErrorCode ierr; 1730 1731 PetscFunctionBegin; 1732 switch (op) { 1733 case MAT_NEW_NONZERO_LOCATIONS: 1734 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1735 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1736 case MAT_KEEP_NONZERO_PATTERN: 1737 case MAT_NEW_NONZERO_LOCATION_ERR: 1738 case MAT_USE_INODES: 1739 case MAT_IGNORE_ZERO_ENTRIES: 1740 MatCheckPreallocated(A,1); 1741 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1742 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1743 break; 1744 case MAT_ROW_ORIENTED: 1745 a->roworiented = flg; 1746 1747 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1748 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1749 break; 1750 case MAT_NEW_DIAGONALS: 1751 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1752 break; 1753 case MAT_IGNORE_OFF_PROC_ENTRIES: 1754 a->donotstash = flg; 1755 break; 1756 case MAT_SPD: 1757 A->spd_set = PETSC_TRUE; 1758 A->spd = flg; 1759 if (flg) { 1760 A->symmetric = PETSC_TRUE; 1761 A->structurally_symmetric = PETSC_TRUE; 1762 A->symmetric_set = PETSC_TRUE; 1763 A->structurally_symmetric_set = PETSC_TRUE; 1764 } 1765 break; 1766 case MAT_SYMMETRIC: 1767 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1768 break; 1769 case MAT_STRUCTURALLY_SYMMETRIC: 1770 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1771 break; 1772 case MAT_HERMITIAN: 1773 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1774 break; 1775 case MAT_SYMMETRY_ETERNAL: 1776 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1777 break; 1778 default: 1779 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1780 } 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatGetRow_MPIAIJ" 1786 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1789 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1790 PetscErrorCode ierr; 1791 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1792 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1793 PetscInt *cmap,*idx_p; 1794 1795 PetscFunctionBegin; 1796 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1797 mat->getrowactive = PETSC_TRUE; 1798 1799 if (!mat->rowvalues && (idx || v)) { 1800 /* 1801 allocate enough space to hold information from the longest row. 1802 */ 1803 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1804 PetscInt max = 1,tmp; 1805 for (i=0; i<matin->rmap->n; i++) { 1806 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1807 if (max < tmp) max = tmp; 1808 } 1809 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1810 } 1811 1812 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1813 lrow = row - rstart; 1814 1815 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1816 if (!v) {pvA = 0; pvB = 0;} 1817 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1818 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1819 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1820 nztot = nzA + nzB; 1821 1822 cmap = mat->garray; 1823 if (v || idx) { 1824 if (nztot) { 1825 /* Sort by increasing column numbers, assuming A and B already sorted */ 1826 PetscInt imark = -1; 1827 if (v) { 1828 *v = v_p = mat->rowvalues; 1829 for (i=0; i<nzB; i++) { 1830 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1831 else break; 1832 } 1833 imark = i; 1834 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1835 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1836 } 1837 if (idx) { 1838 *idx = idx_p = mat->rowindices; 1839 if (imark > -1) { 1840 for (i=0; i<imark; i++) { 1841 idx_p[i] = cmap[cworkB[i]]; 1842 } 1843 } else { 1844 for (i=0; i<nzB; i++) { 1845 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1846 else break; 1847 } 1848 imark = i; 1849 } 1850 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1851 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1852 } 1853 } else { 1854 if (idx) *idx = 0; 1855 if (v) *v = 0; 1856 } 1857 } 1858 *nz = nztot; 1859 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1860 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1861 PetscFunctionReturn(0); 1862 } 1863 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1866 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1867 { 1868 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1869 1870 PetscFunctionBegin; 1871 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1872 aij->getrowactive = PETSC_FALSE; 1873 PetscFunctionReturn(0); 1874 } 1875 1876 #undef __FUNCT__ 1877 #define __FUNCT__ "MatNorm_MPIAIJ" 1878 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1879 { 1880 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1881 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1882 PetscErrorCode ierr; 1883 PetscInt i,j,cstart = mat->cmap->rstart; 1884 PetscReal sum = 0.0; 1885 MatScalar *v; 1886 1887 PetscFunctionBegin; 1888 if (aij->size == 1) { 1889 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1890 } else { 1891 if (type == NORM_FROBENIUS) { 1892 v = amat->a; 1893 for (i=0; i<amat->nz; i++) { 1894 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1895 } 1896 v = bmat->a; 1897 for (i=0; i<bmat->nz; i++) { 1898 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1899 } 1900 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1901 *norm = PetscSqrtReal(*norm); 1902 } else if (type == NORM_1) { /* max column norm */ 1903 PetscReal *tmp,*tmp2; 1904 PetscInt *jj,*garray = aij->garray; 1905 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1906 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1907 *norm = 0.0; 1908 v = amat->a; jj = amat->j; 1909 for (j=0; j<amat->nz; j++) { 1910 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1911 } 1912 v = bmat->a; jj = bmat->j; 1913 for (j=0; j<bmat->nz; j++) { 1914 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1915 } 1916 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1917 for (j=0; j<mat->cmap->N; j++) { 1918 if (tmp2[j] > *norm) *norm = tmp2[j]; 1919 } 1920 ierr = PetscFree(tmp);CHKERRQ(ierr); 1921 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1922 } else if (type == NORM_INFINITY) { /* max row norm */ 1923 PetscReal ntemp = 0.0; 1924 for (j=0; j<aij->A->rmap->n; j++) { 1925 v = amat->a + amat->i[j]; 1926 sum = 0.0; 1927 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1928 sum += PetscAbsScalar(*v); v++; 1929 } 1930 v = bmat->a + bmat->i[j]; 1931 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1932 sum += PetscAbsScalar(*v); v++; 1933 } 1934 if (sum > ntemp) ntemp = sum; 1935 } 1936 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1937 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1938 } 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatTranspose_MPIAIJ" 1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1945 { 1946 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1947 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1950 PetscInt cstart = A->cmap->rstart,ncol; 1951 Mat B; 1952 MatScalar *array; 1953 1954 PetscFunctionBegin; 1955 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1956 1957 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1958 ai = Aloc->i; aj = Aloc->j; 1959 bi = Bloc->i; bj = Bloc->j; 1960 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1961 PetscInt *d_nnz,*g_nnz,*o_nnz; 1962 PetscSFNode *oloc; 1963 PETSC_UNUSED PetscSF sf; 1964 1965 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1966 /* compute d_nnz for preallocation */ 1967 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1968 for (i=0; i<ai[ma]; i++) { 1969 d_nnz[aj[i]]++; 1970 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1971 } 1972 /* compute local off-diagonal contributions */ 1973 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1974 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1975 /* map those to global */ 1976 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1977 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1978 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1979 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1980 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1981 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1982 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1983 1984 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1985 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1986 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1987 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1988 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1989 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1990 } else { 1991 B = *matout; 1992 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1993 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1994 } 1995 1996 /* copy over the A part */ 1997 array = Aloc->a; 1998 row = A->rmap->rstart; 1999 for (i=0; i<ma; i++) { 2000 ncol = ai[i+1]-ai[i]; 2001 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2002 row++; 2003 array += ncol; aj += ncol; 2004 } 2005 aj = Aloc->j; 2006 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2007 2008 /* copy over the B part */ 2009 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2010 array = Bloc->a; 2011 row = A->rmap->rstart; 2012 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2013 cols_tmp = cols; 2014 for (i=0; i<mb; i++) { 2015 ncol = bi[i+1]-bi[i]; 2016 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2017 row++; 2018 array += ncol; cols_tmp += ncol; 2019 } 2020 ierr = PetscFree(cols);CHKERRQ(ierr); 2021 2022 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2024 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2025 *matout = B; 2026 } else { 2027 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2028 } 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2035 { 2036 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2037 Mat a = aij->A,b = aij->B; 2038 PetscErrorCode ierr; 2039 PetscInt s1,s2,s3; 2040 2041 PetscFunctionBegin; 2042 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2043 if (rr) { 2044 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2045 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2046 /* Overlap communication with computation. */ 2047 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2048 } 2049 if (ll) { 2050 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2051 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2052 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2053 } 2054 /* scale the diagonal block */ 2055 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2056 2057 if (rr) { 2058 /* Do a scatter end and then right scale the off-diagonal block */ 2059 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2060 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2061 } 2062 PetscFunctionReturn(0); 2063 } 2064 2065 #undef __FUNCT__ 2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2068 { 2069 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2074 PetscFunctionReturn(0); 2075 } 2076 2077 #undef __FUNCT__ 2078 #define __FUNCT__ "MatEqual_MPIAIJ" 2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2080 { 2081 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2082 Mat a,b,c,d; 2083 PetscBool flg; 2084 PetscErrorCode ierr; 2085 2086 PetscFunctionBegin; 2087 a = matA->A; b = matA->B; 2088 c = matB->A; d = matB->B; 2089 2090 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2091 if (flg) { 2092 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2093 } 2094 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatCopy_MPIAIJ" 2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2101 { 2102 PetscErrorCode ierr; 2103 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2104 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2105 2106 PetscFunctionBegin; 2107 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2108 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2109 /* because of the column compression in the off-processor part of the matrix a->B, 2110 the number of columns in a->B and b->B may be different, hence we cannot call 2111 the MatCopy() directly on the two parts. If need be, we can provide a more 2112 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2113 then copying the submatrices */ 2114 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2115 } else { 2116 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2117 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatSetUp_MPIAIJ" 2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2125 { 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2130 PetscFunctionReturn(0); 2131 } 2132 2133 /* 2134 Computes the number of nonzeros per row needed for preallocation when X and Y 2135 have different nonzero structure. 2136 */ 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2140 { 2141 PetscInt i,j,k,nzx,nzy; 2142 2143 PetscFunctionBegin; 2144 /* Set the number of nonzeros in the new matrix */ 2145 for (i=0; i<m; i++) { 2146 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2147 nzx = xi[i+1] - xi[i]; 2148 nzy = yi[i+1] - yi[i]; 2149 nnz[i] = 0; 2150 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2151 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2152 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2153 nnz[i]++; 2154 } 2155 for (; k<nzy; k++) nnz[i]++; 2156 } 2157 PetscFunctionReturn(0); 2158 } 2159 2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2161 #undef __FUNCT__ 2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2164 { 2165 PetscErrorCode ierr; 2166 PetscInt m = Y->rmap->N; 2167 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2168 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2169 2170 PetscFunctionBegin; 2171 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatAXPY_MPIAIJ" 2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2178 { 2179 PetscErrorCode ierr; 2180 PetscInt i; 2181 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2182 PetscBLASInt bnz,one=1; 2183 Mat_SeqAIJ *x,*y; 2184 2185 PetscFunctionBegin; 2186 if (str == SAME_NONZERO_PATTERN) { 2187 PetscScalar alpha = a; 2188 x = (Mat_SeqAIJ*)xx->A->data; 2189 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2190 y = (Mat_SeqAIJ*)yy->A->data; 2191 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2192 x = (Mat_SeqAIJ*)xx->B->data; 2193 y = (Mat_SeqAIJ*)yy->B->data; 2194 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2195 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2196 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2197 } else if (str == SUBSET_NONZERO_PATTERN) { 2198 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2199 2200 x = (Mat_SeqAIJ*)xx->B->data; 2201 y = (Mat_SeqAIJ*)yy->B->data; 2202 if (y->xtoy && y->XtoY != xx->B) { 2203 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2204 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2205 } 2206 if (!y->xtoy) { /* get xtoy */ 2207 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2208 y->XtoY = xx->B; 2209 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2210 } 2211 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2212 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2213 } else { 2214 Mat B; 2215 PetscInt *nnz_d,*nnz_o; 2216 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2217 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2218 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2219 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2220 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2221 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2222 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2223 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2224 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2225 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2226 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2227 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2228 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2229 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2230 } 2231 PetscFunctionReturn(0); 2232 } 2233 2234 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatConjugate_MPIAIJ" 2238 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2239 { 2240 #if defined(PETSC_USE_COMPLEX) 2241 PetscErrorCode ierr; 2242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2243 2244 PetscFunctionBegin; 2245 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2246 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2247 #else 2248 PetscFunctionBegin; 2249 #endif 2250 PetscFunctionReturn(0); 2251 } 2252 2253 #undef __FUNCT__ 2254 #define __FUNCT__ "MatRealPart_MPIAIJ" 2255 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2256 { 2257 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2258 PetscErrorCode ierr; 2259 2260 PetscFunctionBegin; 2261 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2262 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 #undef __FUNCT__ 2267 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2268 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2269 { 2270 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2271 PetscErrorCode ierr; 2272 2273 PetscFunctionBegin; 2274 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2275 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2276 PetscFunctionReturn(0); 2277 } 2278 2279 #if defined(PETSC_HAVE_PBGL) 2280 2281 #include <boost/parallel/mpi/bsp_process_group.hpp> 2282 #include <boost/graph/distributed/ilu_default_graph.hpp> 2283 #include <boost/graph/distributed/ilu_0_block.hpp> 2284 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2285 #include <boost/graph/distributed/petsc/interface.hpp> 2286 #include <boost/multi_array.hpp> 2287 #include <boost/parallel/distributed_property_map->hpp> 2288 2289 #undef __FUNCT__ 2290 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2291 /* 2292 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2293 */ 2294 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2295 { 2296 namespace petsc = boost::distributed::petsc; 2297 2298 namespace graph_dist = boost::graph::distributed; 2299 using boost::graph::distributed::ilu_default::process_group_type; 2300 using boost::graph::ilu_permuted; 2301 2302 PetscBool row_identity, col_identity; 2303 PetscContainer c; 2304 PetscInt m, n, M, N; 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2309 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2310 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2311 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2312 2313 process_group_type pg; 2314 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2315 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2316 lgraph_type& level_graph = *lgraph_p; 2317 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2318 2319 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2320 ilu_permuted(level_graph); 2321 2322 /* put together the new matrix */ 2323 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2324 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2325 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2326 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2327 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2328 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2329 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2330 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2331 2332 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2333 ierr = PetscContainerSetPointer(c, lgraph_p); 2334 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2335 ierr = PetscContainerDestroy(&c); 2336 PetscFunctionReturn(0); 2337 } 2338 2339 #undef __FUNCT__ 2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2342 { 2343 PetscFunctionBegin; 2344 PetscFunctionReturn(0); 2345 } 2346 2347 #undef __FUNCT__ 2348 #define __FUNCT__ "MatSolve_MPIAIJ" 2349 /* 2350 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2351 */ 2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2353 { 2354 namespace graph_dist = boost::graph::distributed; 2355 2356 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2357 lgraph_type *lgraph_p; 2358 PetscContainer c; 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2363 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2364 ierr = VecCopy(b, x);CHKERRQ(ierr); 2365 2366 PetscScalar *array_x; 2367 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2368 PetscInt sx; 2369 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2370 2371 PetscScalar *array_b; 2372 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2373 PetscInt sb; 2374 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2375 2376 lgraph_type& level_graph = *lgraph_p; 2377 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2378 2379 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2380 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2381 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2382 2383 typedef boost::iterator_property_map<array_ref_type::iterator, 2384 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2385 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2386 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2387 2388 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2389 PetscFunctionReturn(0); 2390 } 2391 #endif 2392 2393 2394 #undef __FUNCT__ 2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2397 { 2398 PetscMPIInt rank,size; 2399 MPI_Comm comm; 2400 PetscErrorCode ierr; 2401 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2402 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2403 PetscInt *rowrange = mat->rmap->range; 2404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2405 Mat A = aij->A,B=aij->B,C=*matredundant; 2406 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2407 PetscScalar *sbuf_a; 2408 PetscInt nzlocal=a->nz+b->nz; 2409 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2410 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2411 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2412 MatScalar *aworkA,*aworkB; 2413 PetscScalar *vals; 2414 PetscMPIInt tag1,tag2,tag3,imdex; 2415 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2416 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2417 MPI_Status recv_status,*send_status; 2418 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2419 PetscInt **rbuf_j=NULL; 2420 PetscScalar **rbuf_a=NULL; 2421 Mat_Redundant *redund =NULL; 2422 2423 PetscFunctionBegin; 2424 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2425 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2427 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2428 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2429 2430 if (reuse == MAT_REUSE_MATRIX) { 2431 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2432 if (subsize == 1) { 2433 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2434 redund = c->redundant; 2435 } else { 2436 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2437 redund = c->redundant; 2438 } 2439 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2440 2441 nsends = redund->nsends; 2442 nrecvs = redund->nrecvs; 2443 send_rank = redund->send_rank; 2444 recv_rank = redund->recv_rank; 2445 sbuf_nz = redund->sbuf_nz; 2446 rbuf_nz = redund->rbuf_nz; 2447 sbuf_j = redund->sbuf_j; 2448 sbuf_a = redund->sbuf_a; 2449 rbuf_j = redund->rbuf_j; 2450 rbuf_a = redund->rbuf_a; 2451 } 2452 2453 if (reuse == MAT_INITIAL_MATRIX) { 2454 PetscInt nleftover,np_subcomm; 2455 2456 /* get the destination processors' id send_rank, nsends and nrecvs */ 2457 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2458 2459 np_subcomm = size/nsubcomm; 2460 nleftover = size - nsubcomm*np_subcomm; 2461 2462 /* block of codes below is specific for INTERLACED */ 2463 /* ------------------------------------------------*/ 2464 nsends = 0; nrecvs = 0; 2465 for (i=0; i<size; i++) { 2466 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2467 send_rank[nsends++] = i; 2468 recv_rank[nrecvs++] = i; 2469 } 2470 } 2471 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2472 i = size-nleftover-1; 2473 j = 0; 2474 while (j < nsubcomm - nleftover) { 2475 send_rank[nsends++] = i; 2476 i--; j++; 2477 } 2478 } 2479 2480 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2481 for (i=0; i<nleftover; i++) { 2482 recv_rank[nrecvs++] = size-nleftover+i; 2483 } 2484 } 2485 /*----------------------------------------------*/ 2486 2487 /* allocate sbuf_j, sbuf_a */ 2488 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2489 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2490 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2491 /* 2492 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2493 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2494 */ 2495 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2496 2497 /* copy mat's local entries into the buffers */ 2498 if (reuse == MAT_INITIAL_MATRIX) { 2499 rownz_max = 0; 2500 rptr = sbuf_j; 2501 cols = sbuf_j + rend-rstart + 1; 2502 vals = sbuf_a; 2503 rptr[0] = 0; 2504 for (i=0; i<rend-rstart; i++) { 2505 row = i + rstart; 2506 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2507 ncols = nzA + nzB; 2508 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2509 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2510 /* load the column indices for this row into cols */ 2511 lwrite = 0; 2512 for (l=0; l<nzB; l++) { 2513 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2514 vals[lwrite] = aworkB[l]; 2515 cols[lwrite++] = ctmp; 2516 } 2517 } 2518 for (l=0; l<nzA; l++) { 2519 vals[lwrite] = aworkA[l]; 2520 cols[lwrite++] = cstart + cworkA[l]; 2521 } 2522 for (l=0; l<nzB; l++) { 2523 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2524 vals[lwrite] = aworkB[l]; 2525 cols[lwrite++] = ctmp; 2526 } 2527 } 2528 vals += ncols; 2529 cols += ncols; 2530 rptr[i+1] = rptr[i] + ncols; 2531 if (rownz_max < ncols) rownz_max = ncols; 2532 } 2533 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2534 } else { /* only copy matrix values into sbuf_a */ 2535 rptr = sbuf_j; 2536 vals = sbuf_a; 2537 rptr[0] = 0; 2538 for (i=0; i<rend-rstart; i++) { 2539 row = i + rstart; 2540 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2541 ncols = nzA + nzB; 2542 cworkB = b->j + b->i[i]; 2543 aworkA = a->a + a->i[i]; 2544 aworkB = b->a + b->i[i]; 2545 lwrite = 0; 2546 for (l=0; l<nzB; l++) { 2547 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2548 } 2549 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2550 for (l=0; l<nzB; l++) { 2551 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2552 } 2553 vals += ncols; 2554 rptr[i+1] = rptr[i] + ncols; 2555 } 2556 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2557 2558 /* send nzlocal to others, and recv other's nzlocal */ 2559 /*--------------------------------------------------*/ 2560 if (reuse == MAT_INITIAL_MATRIX) { 2561 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2562 2563 s_waits2 = s_waits3 + nsends; 2564 s_waits1 = s_waits2 + nsends; 2565 r_waits1 = s_waits1 + nsends; 2566 r_waits2 = r_waits1 + nrecvs; 2567 r_waits3 = r_waits2 + nrecvs; 2568 } else { 2569 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2570 2571 r_waits3 = s_waits3 + nsends; 2572 } 2573 2574 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2575 if (reuse == MAT_INITIAL_MATRIX) { 2576 /* get new tags to keep the communication clean */ 2577 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2578 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2579 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2580 2581 /* post receives of other's nzlocal */ 2582 for (i=0; i<nrecvs; i++) { 2583 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2584 } 2585 /* send nzlocal to others */ 2586 for (i=0; i<nsends; i++) { 2587 sbuf_nz[i] = nzlocal; 2588 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2589 } 2590 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2591 count = nrecvs; 2592 while (count) { 2593 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2594 2595 recv_rank[imdex] = recv_status.MPI_SOURCE; 2596 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2597 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2598 2599 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2600 2601 rbuf_nz[imdex] += i + 2; 2602 2603 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2604 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2605 count--; 2606 } 2607 /* wait on sends of nzlocal */ 2608 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2609 /* send mat->i,j to others, and recv from other's */ 2610 /*------------------------------------------------*/ 2611 for (i=0; i<nsends; i++) { 2612 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2613 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2614 } 2615 /* wait on receives of mat->i,j */ 2616 /*------------------------------*/ 2617 count = nrecvs; 2618 while (count) { 2619 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2620 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2621 count--; 2622 } 2623 /* wait on sends of mat->i,j */ 2624 /*---------------------------*/ 2625 if (nsends) { 2626 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2627 } 2628 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2629 2630 /* post receives, send and receive mat->a */ 2631 /*----------------------------------------*/ 2632 for (imdex=0; imdex<nrecvs; imdex++) { 2633 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2634 } 2635 for (i=0; i<nsends; i++) { 2636 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2637 } 2638 count = nrecvs; 2639 while (count) { 2640 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2641 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2642 count--; 2643 } 2644 if (nsends) { 2645 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2646 } 2647 2648 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2649 2650 /* create redundant matrix */ 2651 /*-------------------------*/ 2652 if (reuse == MAT_INITIAL_MATRIX) { 2653 const PetscInt *range; 2654 PetscInt rstart_sub,rend_sub,mloc_sub; 2655 2656 /* compute rownz_max for preallocation */ 2657 for (imdex=0; imdex<nrecvs; imdex++) { 2658 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2659 rptr = rbuf_j[imdex]; 2660 for (i=0; i<j; i++) { 2661 ncols = rptr[i+1] - rptr[i]; 2662 if (rownz_max < ncols) rownz_max = ncols; 2663 } 2664 } 2665 2666 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2667 2668 /* get local size of redundant matrix 2669 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2670 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2671 rstart_sub = range[nsubcomm*subrank]; 2672 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2673 rend_sub = range[nsubcomm*(subrank+1)]; 2674 } else { 2675 rend_sub = mat->rmap->N; 2676 } 2677 mloc_sub = rend_sub - rstart_sub; 2678 2679 if (M == N) { 2680 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2681 } else { /* non-square matrix */ 2682 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2683 } 2684 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2685 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2686 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2687 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2688 } else { 2689 C = *matredundant; 2690 } 2691 2692 /* insert local matrix entries */ 2693 rptr = sbuf_j; 2694 cols = sbuf_j + rend-rstart + 1; 2695 vals = sbuf_a; 2696 for (i=0; i<rend-rstart; i++) { 2697 row = i + rstart; 2698 ncols = rptr[i+1] - rptr[i]; 2699 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2700 vals += ncols; 2701 cols += ncols; 2702 } 2703 /* insert received matrix entries */ 2704 for (imdex=0; imdex<nrecvs; imdex++) { 2705 rstart = rowrange[recv_rank[imdex]]; 2706 rend = rowrange[recv_rank[imdex]+1]; 2707 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2708 rptr = rbuf_j[imdex]; 2709 cols = rbuf_j[imdex] + rend-rstart + 1; 2710 vals = rbuf_a[imdex]; 2711 for (i=0; i<rend-rstart; i++) { 2712 row = i + rstart; 2713 ncols = rptr[i+1] - rptr[i]; 2714 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2715 vals += ncols; 2716 cols += ncols; 2717 } 2718 } 2719 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2720 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2721 2722 if (reuse == MAT_INITIAL_MATRIX) { 2723 *matredundant = C; 2724 2725 /* create a supporting struct and attach it to C for reuse */ 2726 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2727 if (subsize == 1) { 2728 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2729 c->redundant = redund; 2730 } else { 2731 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2732 c->redundant = redund; 2733 } 2734 2735 redund->nzlocal = nzlocal; 2736 redund->nsends = nsends; 2737 redund->nrecvs = nrecvs; 2738 redund->send_rank = send_rank; 2739 redund->recv_rank = recv_rank; 2740 redund->sbuf_nz = sbuf_nz; 2741 redund->rbuf_nz = rbuf_nz; 2742 redund->sbuf_j = sbuf_j; 2743 redund->sbuf_a = sbuf_a; 2744 redund->rbuf_j = rbuf_j; 2745 redund->rbuf_a = rbuf_a; 2746 redund->psubcomm = NULL; 2747 } 2748 PetscFunctionReturn(0); 2749 } 2750 2751 #undef __FUNCT__ 2752 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2753 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2754 { 2755 PetscErrorCode ierr; 2756 MPI_Comm comm; 2757 PetscMPIInt size,subsize; 2758 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2759 Mat_Redundant *redund=NULL; 2760 PetscSubcomm psubcomm=NULL; 2761 MPI_Comm subcomm_in=subcomm; 2762 Mat *matseq; 2763 IS isrow,iscol; 2764 2765 PetscFunctionBegin; 2766 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2767 if (reuse == MAT_INITIAL_MATRIX) { 2768 /* create psubcomm, then get subcomm */ 2769 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2770 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2771 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2772 2773 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2774 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2775 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2776 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2777 subcomm = psubcomm->comm; 2778 } else { /* retrieve psubcomm and subcomm */ 2779 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2780 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2781 if (subsize == 1) { 2782 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2783 redund = c->redundant; 2784 } else { 2785 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2786 redund = c->redundant; 2787 } 2788 psubcomm = redund->psubcomm; 2789 } 2790 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2791 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2792 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2793 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2794 if (subsize == 1) { 2795 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2796 c->redundant->psubcomm = psubcomm; 2797 } else { 2798 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2799 c->redundant->psubcomm = psubcomm ; 2800 } 2801 } 2802 PetscFunctionReturn(0); 2803 } 2804 } 2805 2806 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2807 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2808 if (reuse == MAT_INITIAL_MATRIX) { 2809 /* create a local sequential matrix matseq[0] */ 2810 mloc_sub = PETSC_DECIDE; 2811 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2812 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2813 rstart = rend - mloc_sub; 2814 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2815 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2816 } else { /* reuse == MAT_REUSE_MATRIX */ 2817 if (subsize == 1) { 2818 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2819 redund = c->redundant; 2820 } else { 2821 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2822 redund = c->redundant; 2823 } 2824 2825 isrow = redund->isrow; 2826 iscol = redund->iscol; 2827 matseq = redund->matseq; 2828 } 2829 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2830 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2831 2832 if (reuse == MAT_INITIAL_MATRIX) { 2833 /* create a supporting struct and attach it to C for reuse */ 2834 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2835 if (subsize == 1) { 2836 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2837 c->redundant = redund; 2838 } else { 2839 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2840 c->redundant = redund; 2841 } 2842 redund->isrow = isrow; 2843 redund->iscol = iscol; 2844 redund->matseq = matseq; 2845 redund->psubcomm = psubcomm; 2846 } 2847 PetscFunctionReturn(0); 2848 } 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2852 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2853 { 2854 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2855 PetscErrorCode ierr; 2856 PetscInt i,*idxb = 0; 2857 PetscScalar *va,*vb; 2858 Vec vtmp; 2859 2860 PetscFunctionBegin; 2861 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2862 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2863 if (idx) { 2864 for (i=0; i<A->rmap->n; i++) { 2865 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2866 } 2867 } 2868 2869 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2870 if (idx) { 2871 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2872 } 2873 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2874 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2875 2876 for (i=0; i<A->rmap->n; i++) { 2877 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2878 va[i] = vb[i]; 2879 if (idx) idx[i] = a->garray[idxb[i]]; 2880 } 2881 } 2882 2883 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2884 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2885 ierr = PetscFree(idxb);CHKERRQ(ierr); 2886 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 #undef __FUNCT__ 2891 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2892 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2893 { 2894 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2895 PetscErrorCode ierr; 2896 PetscInt i,*idxb = 0; 2897 PetscScalar *va,*vb; 2898 Vec vtmp; 2899 2900 PetscFunctionBegin; 2901 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2902 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2903 if (idx) { 2904 for (i=0; i<A->cmap->n; i++) { 2905 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2906 } 2907 } 2908 2909 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2910 if (idx) { 2911 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2912 } 2913 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2914 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2915 2916 for (i=0; i<A->rmap->n; i++) { 2917 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2918 va[i] = vb[i]; 2919 if (idx) idx[i] = a->garray[idxb[i]]; 2920 } 2921 } 2922 2923 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2924 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2925 ierr = PetscFree(idxb);CHKERRQ(ierr); 2926 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2927 PetscFunctionReturn(0); 2928 } 2929 2930 #undef __FUNCT__ 2931 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2932 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2933 { 2934 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2935 PetscInt n = A->rmap->n; 2936 PetscInt cstart = A->cmap->rstart; 2937 PetscInt *cmap = mat->garray; 2938 PetscInt *diagIdx, *offdiagIdx; 2939 Vec diagV, offdiagV; 2940 PetscScalar *a, *diagA, *offdiagA; 2941 PetscInt r; 2942 PetscErrorCode ierr; 2943 2944 PetscFunctionBegin; 2945 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2946 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2947 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2948 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2949 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2950 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2951 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2952 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2953 for (r = 0; r < n; ++r) { 2954 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2955 a[r] = diagA[r]; 2956 idx[r] = cstart + diagIdx[r]; 2957 } else { 2958 a[r] = offdiagA[r]; 2959 idx[r] = cmap[offdiagIdx[r]]; 2960 } 2961 } 2962 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2963 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2964 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2965 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2966 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2967 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2968 PetscFunctionReturn(0); 2969 } 2970 2971 #undef __FUNCT__ 2972 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2973 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2974 { 2975 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2976 PetscInt n = A->rmap->n; 2977 PetscInt cstart = A->cmap->rstart; 2978 PetscInt *cmap = mat->garray; 2979 PetscInt *diagIdx, *offdiagIdx; 2980 Vec diagV, offdiagV; 2981 PetscScalar *a, *diagA, *offdiagA; 2982 PetscInt r; 2983 PetscErrorCode ierr; 2984 2985 PetscFunctionBegin; 2986 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2987 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2988 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2989 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2990 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2991 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2992 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2993 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2994 for (r = 0; r < n; ++r) { 2995 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2996 a[r] = diagA[r]; 2997 idx[r] = cstart + diagIdx[r]; 2998 } else { 2999 a[r] = offdiagA[r]; 3000 idx[r] = cmap[offdiagIdx[r]]; 3001 } 3002 } 3003 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3005 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3006 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3007 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3008 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3009 PetscFunctionReturn(0); 3010 } 3011 3012 #undef __FUNCT__ 3013 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3014 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3015 { 3016 PetscErrorCode ierr; 3017 Mat *dummy; 3018 3019 PetscFunctionBegin; 3020 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3021 *newmat = *dummy; 3022 ierr = PetscFree(dummy);CHKERRQ(ierr); 3023 PetscFunctionReturn(0); 3024 } 3025 3026 #undef __FUNCT__ 3027 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3028 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3029 { 3030 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3031 PetscErrorCode ierr; 3032 3033 PetscFunctionBegin; 3034 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3035 PetscFunctionReturn(0); 3036 } 3037 3038 #undef __FUNCT__ 3039 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3040 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3041 { 3042 PetscErrorCode ierr; 3043 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3044 3045 PetscFunctionBegin; 3046 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3047 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3048 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3049 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3050 PetscFunctionReturn(0); 3051 } 3052 3053 /* -------------------------------------------------------------------*/ 3054 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3055 MatGetRow_MPIAIJ, 3056 MatRestoreRow_MPIAIJ, 3057 MatMult_MPIAIJ, 3058 /* 4*/ MatMultAdd_MPIAIJ, 3059 MatMultTranspose_MPIAIJ, 3060 MatMultTransposeAdd_MPIAIJ, 3061 #if defined(PETSC_HAVE_PBGL) 3062 MatSolve_MPIAIJ, 3063 #else 3064 0, 3065 #endif 3066 0, 3067 0, 3068 /*10*/ 0, 3069 0, 3070 0, 3071 MatSOR_MPIAIJ, 3072 MatTranspose_MPIAIJ, 3073 /*15*/ MatGetInfo_MPIAIJ, 3074 MatEqual_MPIAIJ, 3075 MatGetDiagonal_MPIAIJ, 3076 MatDiagonalScale_MPIAIJ, 3077 MatNorm_MPIAIJ, 3078 /*20*/ MatAssemblyBegin_MPIAIJ, 3079 MatAssemblyEnd_MPIAIJ, 3080 MatSetOption_MPIAIJ, 3081 MatZeroEntries_MPIAIJ, 3082 /*24*/ MatZeroRows_MPIAIJ, 3083 0, 3084 #if defined(PETSC_HAVE_PBGL) 3085 0, 3086 #else 3087 0, 3088 #endif 3089 0, 3090 0, 3091 /*29*/ MatSetUp_MPIAIJ, 3092 #if defined(PETSC_HAVE_PBGL) 3093 0, 3094 #else 3095 0, 3096 #endif 3097 0, 3098 0, 3099 0, 3100 /*34*/ MatDuplicate_MPIAIJ, 3101 0, 3102 0, 3103 0, 3104 0, 3105 /*39*/ MatAXPY_MPIAIJ, 3106 MatGetSubMatrices_MPIAIJ, 3107 MatIncreaseOverlap_MPIAIJ, 3108 MatGetValues_MPIAIJ, 3109 MatCopy_MPIAIJ, 3110 /*44*/ MatGetRowMax_MPIAIJ, 3111 MatScale_MPIAIJ, 3112 0, 3113 0, 3114 MatZeroRowsColumns_MPIAIJ, 3115 /*49*/ MatSetRandom_MPIAIJ, 3116 0, 3117 0, 3118 0, 3119 0, 3120 /*54*/ MatFDColoringCreate_MPIXAIJ, 3121 0, 3122 MatSetUnfactored_MPIAIJ, 3123 MatPermute_MPIAIJ, 3124 0, 3125 /*59*/ MatGetSubMatrix_MPIAIJ, 3126 MatDestroy_MPIAIJ, 3127 MatView_MPIAIJ, 3128 0, 3129 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3130 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3131 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3132 0, 3133 0, 3134 0, 3135 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3136 MatGetRowMinAbs_MPIAIJ, 3137 0, 3138 MatSetColoring_MPIAIJ, 3139 0, 3140 MatSetValuesAdifor_MPIAIJ, 3141 /*75*/ MatFDColoringApply_AIJ, 3142 0, 3143 0, 3144 0, 3145 MatFindZeroDiagonals_MPIAIJ, 3146 /*80*/ 0, 3147 0, 3148 0, 3149 /*83*/ MatLoad_MPIAIJ, 3150 0, 3151 0, 3152 0, 3153 0, 3154 0, 3155 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3156 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3157 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3158 MatPtAP_MPIAIJ_MPIAIJ, 3159 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3160 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3161 0, 3162 0, 3163 0, 3164 0, 3165 /*99*/ 0, 3166 0, 3167 0, 3168 MatConjugate_MPIAIJ, 3169 0, 3170 /*104*/MatSetValuesRow_MPIAIJ, 3171 MatRealPart_MPIAIJ, 3172 MatImaginaryPart_MPIAIJ, 3173 0, 3174 0, 3175 /*109*/0, 3176 MatGetRedundantMatrix_MPIAIJ, 3177 MatGetRowMin_MPIAIJ, 3178 0, 3179 0, 3180 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3181 0, 3182 0, 3183 0, 3184 0, 3185 /*119*/0, 3186 0, 3187 0, 3188 0, 3189 MatGetMultiProcBlock_MPIAIJ, 3190 /*124*/MatFindNonzeroRows_MPIAIJ, 3191 MatGetColumnNorms_MPIAIJ, 3192 MatInvertBlockDiagonal_MPIAIJ, 3193 0, 3194 MatGetSubMatricesParallel_MPIAIJ, 3195 /*129*/0, 3196 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3197 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3198 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3199 0, 3200 /*134*/0, 3201 0, 3202 0, 3203 0, 3204 0, 3205 /*139*/0, 3206 0, 3207 0, 3208 MatFDColoringSetUp_MPIXAIJ 3209 }; 3210 3211 /* ----------------------------------------------------------------------------------------*/ 3212 3213 #undef __FUNCT__ 3214 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3215 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3216 { 3217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3218 PetscErrorCode ierr; 3219 3220 PetscFunctionBegin; 3221 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3222 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3228 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3229 { 3230 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3231 PetscErrorCode ierr; 3232 3233 PetscFunctionBegin; 3234 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3235 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 3239 #undef __FUNCT__ 3240 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3241 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3242 { 3243 Mat_MPIAIJ *b; 3244 PetscErrorCode ierr; 3245 3246 PetscFunctionBegin; 3247 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3248 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3249 b = (Mat_MPIAIJ*)B->data; 3250 3251 if (!B->preallocated) { 3252 /* Explicitly create 2 MATSEQAIJ matrices. */ 3253 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3254 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3255 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3256 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3257 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3258 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3259 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3260 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3261 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3262 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3263 } 3264 3265 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3266 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3267 B->preallocated = PETSC_TRUE; 3268 PetscFunctionReturn(0); 3269 } 3270 3271 #undef __FUNCT__ 3272 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3273 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3274 { 3275 Mat mat; 3276 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3277 PetscErrorCode ierr; 3278 3279 PetscFunctionBegin; 3280 *newmat = 0; 3281 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3282 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3283 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3284 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3285 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3286 a = (Mat_MPIAIJ*)mat->data; 3287 3288 mat->factortype = matin->factortype; 3289 mat->assembled = PETSC_TRUE; 3290 mat->insertmode = NOT_SET_VALUES; 3291 mat->preallocated = PETSC_TRUE; 3292 3293 a->size = oldmat->size; 3294 a->rank = oldmat->rank; 3295 a->donotstash = oldmat->donotstash; 3296 a->roworiented = oldmat->roworiented; 3297 a->rowindices = 0; 3298 a->rowvalues = 0; 3299 a->getrowactive = PETSC_FALSE; 3300 3301 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3302 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3303 3304 if (oldmat->colmap) { 3305 #if defined(PETSC_USE_CTABLE) 3306 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3307 #else 3308 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3309 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3310 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3311 #endif 3312 } else a->colmap = 0; 3313 if (oldmat->garray) { 3314 PetscInt len; 3315 len = oldmat->B->cmap->n; 3316 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3317 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3318 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3319 } else a->garray = 0; 3320 3321 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3323 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3324 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3325 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3326 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3327 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3328 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3329 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3330 *newmat = mat; 3331 PetscFunctionReturn(0); 3332 } 3333 3334 3335 3336 #undef __FUNCT__ 3337 #define __FUNCT__ "MatLoad_MPIAIJ" 3338 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3339 { 3340 PetscScalar *vals,*svals; 3341 MPI_Comm comm; 3342 PetscErrorCode ierr; 3343 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3344 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3345 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3346 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3347 PetscInt cend,cstart,n,*rowners,sizesset=1; 3348 int fd; 3349 PetscInt bs = 1; 3350 3351 PetscFunctionBegin; 3352 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3353 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3354 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3355 if (!rank) { 3356 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3357 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3358 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3359 } 3360 3361 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3362 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3363 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3364 3365 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3366 3367 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3368 M = header[1]; N = header[2]; 3369 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3370 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3371 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3372 3373 /* If global sizes are set, check if they are consistent with that given in the file */ 3374 if (sizesset) { 3375 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3376 } 3377 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3378 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3379 3380 /* determine ownership of all (block) rows */ 3381 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3382 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3383 else m = newMat->rmap->n; /* Set by user */ 3384 3385 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3386 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3387 3388 /* First process needs enough room for process with most rows */ 3389 if (!rank) { 3390 mmax = rowners[1]; 3391 for (i=2; i<=size; i++) { 3392 mmax = PetscMax(mmax, rowners[i]); 3393 } 3394 } else mmax = -1; /* unused, but compilers complain */ 3395 3396 rowners[0] = 0; 3397 for (i=2; i<=size; i++) { 3398 rowners[i] += rowners[i-1]; 3399 } 3400 rstart = rowners[rank]; 3401 rend = rowners[rank+1]; 3402 3403 /* distribute row lengths to all processors */ 3404 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3405 if (!rank) { 3406 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3407 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3408 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3409 for (j=0; j<m; j++) { 3410 procsnz[0] += ourlens[j]; 3411 } 3412 for (i=1; i<size; i++) { 3413 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3414 /* calculate the number of nonzeros on each processor */ 3415 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3416 procsnz[i] += rowlengths[j]; 3417 } 3418 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3419 } 3420 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3421 } else { 3422 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3423 } 3424 3425 if (!rank) { 3426 /* determine max buffer needed and allocate it */ 3427 maxnz = 0; 3428 for (i=0; i<size; i++) { 3429 maxnz = PetscMax(maxnz,procsnz[i]); 3430 } 3431 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3432 3433 /* read in my part of the matrix column indices */ 3434 nz = procsnz[0]; 3435 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3436 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3437 3438 /* read in every one elses and ship off */ 3439 for (i=1; i<size; i++) { 3440 nz = procsnz[i]; 3441 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3442 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3443 } 3444 ierr = PetscFree(cols);CHKERRQ(ierr); 3445 } else { 3446 /* determine buffer space needed for message */ 3447 nz = 0; 3448 for (i=0; i<m; i++) { 3449 nz += ourlens[i]; 3450 } 3451 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3452 3453 /* receive message of column indices*/ 3454 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3455 } 3456 3457 /* determine column ownership if matrix is not square */ 3458 if (N != M) { 3459 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3460 else n = newMat->cmap->n; 3461 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3462 cstart = cend - n; 3463 } else { 3464 cstart = rstart; 3465 cend = rend; 3466 n = cend - cstart; 3467 } 3468 3469 /* loop over local rows, determining number of off diagonal entries */ 3470 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3471 jj = 0; 3472 for (i=0; i<m; i++) { 3473 for (j=0; j<ourlens[i]; j++) { 3474 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3475 jj++; 3476 } 3477 } 3478 3479 for (i=0; i<m; i++) { 3480 ourlens[i] -= offlens[i]; 3481 } 3482 if (!sizesset) { 3483 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3484 } 3485 3486 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3487 3488 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3489 3490 for (i=0; i<m; i++) { 3491 ourlens[i] += offlens[i]; 3492 } 3493 3494 if (!rank) { 3495 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3496 3497 /* read in my part of the matrix numerical values */ 3498 nz = procsnz[0]; 3499 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3500 3501 /* insert into matrix */ 3502 jj = rstart; 3503 smycols = mycols; 3504 svals = vals; 3505 for (i=0; i<m; i++) { 3506 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3507 smycols += ourlens[i]; 3508 svals += ourlens[i]; 3509 jj++; 3510 } 3511 3512 /* read in other processors and ship out */ 3513 for (i=1; i<size; i++) { 3514 nz = procsnz[i]; 3515 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3516 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3517 } 3518 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3519 } else { 3520 /* receive numeric values */ 3521 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3522 3523 /* receive message of values*/ 3524 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3525 3526 /* insert into matrix */ 3527 jj = rstart; 3528 smycols = mycols; 3529 svals = vals; 3530 for (i=0; i<m; i++) { 3531 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3532 smycols += ourlens[i]; 3533 svals += ourlens[i]; 3534 jj++; 3535 } 3536 } 3537 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3538 ierr = PetscFree(vals);CHKERRQ(ierr); 3539 ierr = PetscFree(mycols);CHKERRQ(ierr); 3540 ierr = PetscFree(rowners);CHKERRQ(ierr); 3541 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3542 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3543 PetscFunctionReturn(0); 3544 } 3545 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3548 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3549 { 3550 PetscErrorCode ierr; 3551 IS iscol_local; 3552 PetscInt csize; 3553 3554 PetscFunctionBegin; 3555 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3556 if (call == MAT_REUSE_MATRIX) { 3557 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3558 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3559 } else { 3560 PetscInt cbs; 3561 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3562 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3563 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3564 } 3565 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3566 if (call == MAT_INITIAL_MATRIX) { 3567 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3568 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3569 } 3570 PetscFunctionReturn(0); 3571 } 3572 3573 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3574 #undef __FUNCT__ 3575 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3576 /* 3577 Not great since it makes two copies of the submatrix, first an SeqAIJ 3578 in local and then by concatenating the local matrices the end result. 3579 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3580 3581 Note: This requires a sequential iscol with all indices. 3582 */ 3583 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3584 { 3585 PetscErrorCode ierr; 3586 PetscMPIInt rank,size; 3587 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3588 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3589 PetscBool allcolumns, colflag; 3590 Mat M,Mreuse; 3591 MatScalar *vwork,*aa; 3592 MPI_Comm comm; 3593 Mat_SeqAIJ *aij; 3594 3595 PetscFunctionBegin; 3596 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3597 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3598 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3599 3600 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3601 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3602 if (colflag && ncol == mat->cmap->N) { 3603 allcolumns = PETSC_TRUE; 3604 } else { 3605 allcolumns = PETSC_FALSE; 3606 } 3607 if (call == MAT_REUSE_MATRIX) { 3608 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3609 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3610 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3611 } else { 3612 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3613 } 3614 3615 /* 3616 m - number of local rows 3617 n - number of columns (same on all processors) 3618 rstart - first row in new global matrix generated 3619 */ 3620 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3621 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3622 if (call == MAT_INITIAL_MATRIX) { 3623 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3624 ii = aij->i; 3625 jj = aij->j; 3626 3627 /* 3628 Determine the number of non-zeros in the diagonal and off-diagonal 3629 portions of the matrix in order to do correct preallocation 3630 */ 3631 3632 /* first get start and end of "diagonal" columns */ 3633 if (csize == PETSC_DECIDE) { 3634 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3635 if (mglobal == n) { /* square matrix */ 3636 nlocal = m; 3637 } else { 3638 nlocal = n/size + ((n % size) > rank); 3639 } 3640 } else { 3641 nlocal = csize; 3642 } 3643 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3644 rstart = rend - nlocal; 3645 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3646 3647 /* next, compute all the lengths */ 3648 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3649 olens = dlens + m; 3650 for (i=0; i<m; i++) { 3651 jend = ii[i+1] - ii[i]; 3652 olen = 0; 3653 dlen = 0; 3654 for (j=0; j<jend; j++) { 3655 if (*jj < rstart || *jj >= rend) olen++; 3656 else dlen++; 3657 jj++; 3658 } 3659 olens[i] = olen; 3660 dlens[i] = dlen; 3661 } 3662 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3663 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3664 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3665 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3666 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3667 ierr = PetscFree(dlens);CHKERRQ(ierr); 3668 } else { 3669 PetscInt ml,nl; 3670 3671 M = *newmat; 3672 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3673 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3674 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3675 /* 3676 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3677 rather than the slower MatSetValues(). 3678 */ 3679 M->was_assembled = PETSC_TRUE; 3680 M->assembled = PETSC_FALSE; 3681 } 3682 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3683 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3684 ii = aij->i; 3685 jj = aij->j; 3686 aa = aij->a; 3687 for (i=0; i<m; i++) { 3688 row = rstart + i; 3689 nz = ii[i+1] - ii[i]; 3690 cwork = jj; jj += nz; 3691 vwork = aa; aa += nz; 3692 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3693 } 3694 3695 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3696 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3697 *newmat = M; 3698 3699 /* save submatrix used in processor for next request */ 3700 if (call == MAT_INITIAL_MATRIX) { 3701 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3702 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3703 } 3704 PetscFunctionReturn(0); 3705 } 3706 3707 #undef __FUNCT__ 3708 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3709 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3710 { 3711 PetscInt m,cstart, cend,j,nnz,i,d; 3712 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3713 const PetscInt *JJ; 3714 PetscScalar *values; 3715 PetscErrorCode ierr; 3716 3717 PetscFunctionBegin; 3718 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3719 3720 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3721 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3722 m = B->rmap->n; 3723 cstart = B->cmap->rstart; 3724 cend = B->cmap->rend; 3725 rstart = B->rmap->rstart; 3726 3727 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3728 3729 #if defined(PETSC_USE_DEBUGGING) 3730 for (i=0; i<m; i++) { 3731 nnz = Ii[i+1]- Ii[i]; 3732 JJ = J + Ii[i]; 3733 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3734 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3735 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3736 } 3737 #endif 3738 3739 for (i=0; i<m; i++) { 3740 nnz = Ii[i+1]- Ii[i]; 3741 JJ = J + Ii[i]; 3742 nnz_max = PetscMax(nnz_max,nnz); 3743 d = 0; 3744 for (j=0; j<nnz; j++) { 3745 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3746 } 3747 d_nnz[i] = d; 3748 o_nnz[i] = nnz - d; 3749 } 3750 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3751 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3752 3753 if (v) values = (PetscScalar*)v; 3754 else { 3755 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3756 } 3757 3758 for (i=0; i<m; i++) { 3759 ii = i + rstart; 3760 nnz = Ii[i+1]- Ii[i]; 3761 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3762 } 3763 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 3766 if (!v) { 3767 ierr = PetscFree(values);CHKERRQ(ierr); 3768 } 3769 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3770 PetscFunctionReturn(0); 3771 } 3772 3773 #undef __FUNCT__ 3774 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3775 /*@ 3776 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3777 (the default parallel PETSc format). 3778 3779 Collective on MPI_Comm 3780 3781 Input Parameters: 3782 + B - the matrix 3783 . i - the indices into j for the start of each local row (starts with zero) 3784 . j - the column indices for each local row (starts with zero) 3785 - v - optional values in the matrix 3786 3787 Level: developer 3788 3789 Notes: 3790 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3791 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3792 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3793 3794 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3795 3796 The format which is used for the sparse matrix input, is equivalent to a 3797 row-major ordering.. i.e for the following matrix, the input data expected is 3798 as shown: 3799 3800 1 0 0 3801 2 0 3 P0 3802 ------- 3803 4 5 6 P1 3804 3805 Process0 [P0]: rows_owned=[0,1] 3806 i = {0,1,3} [size = nrow+1 = 2+1] 3807 j = {0,0,2} [size = nz = 6] 3808 v = {1,2,3} [size = nz = 6] 3809 3810 Process1 [P1]: rows_owned=[2] 3811 i = {0,3} [size = nrow+1 = 1+1] 3812 j = {0,1,2} [size = nz = 6] 3813 v = {4,5,6} [size = nz = 6] 3814 3815 .keywords: matrix, aij, compressed row, sparse, parallel 3816 3817 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3818 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3819 @*/ 3820 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3821 { 3822 PetscErrorCode ierr; 3823 3824 PetscFunctionBegin; 3825 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3826 PetscFunctionReturn(0); 3827 } 3828 3829 #undef __FUNCT__ 3830 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3831 /*@C 3832 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3833 (the default parallel PETSc format). For good matrix assembly performance 3834 the user should preallocate the matrix storage by setting the parameters 3835 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3836 performance can be increased by more than a factor of 50. 3837 3838 Collective on MPI_Comm 3839 3840 Input Parameters: 3841 + B - the matrix 3842 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3843 (same value is used for all local rows) 3844 . d_nnz - array containing the number of nonzeros in the various rows of the 3845 DIAGONAL portion of the local submatrix (possibly different for each row) 3846 or NULL, if d_nz is used to specify the nonzero structure. 3847 The size of this array is equal to the number of local rows, i.e 'm'. 3848 For matrices that will be factored, you must leave room for (and set) 3849 the diagonal entry even if it is zero. 3850 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3851 submatrix (same value is used for all local rows). 3852 - o_nnz - array containing the number of nonzeros in the various rows of the 3853 OFF-DIAGONAL portion of the local submatrix (possibly different for 3854 each row) or NULL, if o_nz is used to specify the nonzero 3855 structure. The size of this array is equal to the number 3856 of local rows, i.e 'm'. 3857 3858 If the *_nnz parameter is given then the *_nz parameter is ignored 3859 3860 The AIJ format (also called the Yale sparse matrix format or 3861 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3862 storage. The stored row and column indices begin with zero. 3863 See Users-Manual: ch_mat for details. 3864 3865 The parallel matrix is partitioned such that the first m0 rows belong to 3866 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3867 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3868 3869 The DIAGONAL portion of the local submatrix of a processor can be defined 3870 as the submatrix which is obtained by extraction the part corresponding to 3871 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3872 first row that belongs to the processor, r2 is the last row belonging to 3873 the this processor, and c1-c2 is range of indices of the local part of a 3874 vector suitable for applying the matrix to. This is an mxn matrix. In the 3875 common case of a square matrix, the row and column ranges are the same and 3876 the DIAGONAL part is also square. The remaining portion of the local 3877 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3878 3879 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3880 3881 You can call MatGetInfo() to get information on how effective the preallocation was; 3882 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3883 You can also run with the option -info and look for messages with the string 3884 malloc in them to see if additional memory allocation was needed. 3885 3886 Example usage: 3887 3888 Consider the following 8x8 matrix with 34 non-zero values, that is 3889 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3890 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3891 as follows: 3892 3893 .vb 3894 1 2 0 | 0 3 0 | 0 4 3895 Proc0 0 5 6 | 7 0 0 | 8 0 3896 9 0 10 | 11 0 0 | 12 0 3897 ------------------------------------- 3898 13 0 14 | 15 16 17 | 0 0 3899 Proc1 0 18 0 | 19 20 21 | 0 0 3900 0 0 0 | 22 23 0 | 24 0 3901 ------------------------------------- 3902 Proc2 25 26 27 | 0 0 28 | 29 0 3903 30 0 0 | 31 32 33 | 0 34 3904 .ve 3905 3906 This can be represented as a collection of submatrices as: 3907 3908 .vb 3909 A B C 3910 D E F 3911 G H I 3912 .ve 3913 3914 Where the submatrices A,B,C are owned by proc0, D,E,F are 3915 owned by proc1, G,H,I are owned by proc2. 3916 3917 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3918 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3919 The 'M','N' parameters are 8,8, and have the same values on all procs. 3920 3921 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3922 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3923 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3924 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3925 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3926 matrix, ans [DF] as another SeqAIJ matrix. 3927 3928 When d_nz, o_nz parameters are specified, d_nz storage elements are 3929 allocated for every row of the local diagonal submatrix, and o_nz 3930 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3931 One way to choose d_nz and o_nz is to use the max nonzerors per local 3932 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3933 In this case, the values of d_nz,o_nz are: 3934 .vb 3935 proc0 : dnz = 2, o_nz = 2 3936 proc1 : dnz = 3, o_nz = 2 3937 proc2 : dnz = 1, o_nz = 4 3938 .ve 3939 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3940 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3941 for proc3. i.e we are using 12+15+10=37 storage locations to store 3942 34 values. 3943 3944 When d_nnz, o_nnz parameters are specified, the storage is specified 3945 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3946 In the above case the values for d_nnz,o_nnz are: 3947 .vb 3948 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3949 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3950 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3951 .ve 3952 Here the space allocated is sum of all the above values i.e 34, and 3953 hence pre-allocation is perfect. 3954 3955 Level: intermediate 3956 3957 .keywords: matrix, aij, compressed row, sparse, parallel 3958 3959 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3960 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3961 @*/ 3962 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3963 { 3964 PetscErrorCode ierr; 3965 3966 PetscFunctionBegin; 3967 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3968 PetscValidType(B,1); 3969 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3970 PetscFunctionReturn(0); 3971 } 3972 3973 #undef __FUNCT__ 3974 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3975 /*@ 3976 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3977 CSR format the local rows. 3978 3979 Collective on MPI_Comm 3980 3981 Input Parameters: 3982 + comm - MPI communicator 3983 . m - number of local rows (Cannot be PETSC_DECIDE) 3984 . n - This value should be the same as the local size used in creating the 3985 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3986 calculated if N is given) For square matrices n is almost always m. 3987 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3988 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3989 . i - row indices 3990 . j - column indices 3991 - a - matrix values 3992 3993 Output Parameter: 3994 . mat - the matrix 3995 3996 Level: intermediate 3997 3998 Notes: 3999 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4000 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4001 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4002 4003 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4004 4005 The format which is used for the sparse matrix input, is equivalent to a 4006 row-major ordering.. i.e for the following matrix, the input data expected is 4007 as shown: 4008 4009 1 0 0 4010 2 0 3 P0 4011 ------- 4012 4 5 6 P1 4013 4014 Process0 [P0]: rows_owned=[0,1] 4015 i = {0,1,3} [size = nrow+1 = 2+1] 4016 j = {0,0,2} [size = nz = 6] 4017 v = {1,2,3} [size = nz = 6] 4018 4019 Process1 [P1]: rows_owned=[2] 4020 i = {0,3} [size = nrow+1 = 1+1] 4021 j = {0,1,2} [size = nz = 6] 4022 v = {4,5,6} [size = nz = 6] 4023 4024 .keywords: matrix, aij, compressed row, sparse, parallel 4025 4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4027 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4028 @*/ 4029 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4030 { 4031 PetscErrorCode ierr; 4032 4033 PetscFunctionBegin; 4034 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4035 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4036 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4037 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4038 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4039 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4040 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4041 PetscFunctionReturn(0); 4042 } 4043 4044 #undef __FUNCT__ 4045 #define __FUNCT__ "MatCreateAIJ" 4046 /*@C 4047 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4048 (the default parallel PETSc format). For good matrix assembly performance 4049 the user should preallocate the matrix storage by setting the parameters 4050 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4051 performance can be increased by more than a factor of 50. 4052 4053 Collective on MPI_Comm 4054 4055 Input Parameters: 4056 + comm - MPI communicator 4057 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4058 This value should be the same as the local size used in creating the 4059 y vector for the matrix-vector product y = Ax. 4060 . n - This value should be the same as the local size used in creating the 4061 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4062 calculated if N is given) For square matrices n is almost always m. 4063 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4064 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4065 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4066 (same value is used for all local rows) 4067 . d_nnz - array containing the number of nonzeros in the various rows of the 4068 DIAGONAL portion of the local submatrix (possibly different for each row) 4069 or NULL, if d_nz is used to specify the nonzero structure. 4070 The size of this array is equal to the number of local rows, i.e 'm'. 4071 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4072 submatrix (same value is used for all local rows). 4073 - o_nnz - array containing the number of nonzeros in the various rows of the 4074 OFF-DIAGONAL portion of the local submatrix (possibly different for 4075 each row) or NULL, if o_nz is used to specify the nonzero 4076 structure. The size of this array is equal to the number 4077 of local rows, i.e 'm'. 4078 4079 Output Parameter: 4080 . A - the matrix 4081 4082 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4083 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4084 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4085 4086 Notes: 4087 If the *_nnz parameter is given then the *_nz parameter is ignored 4088 4089 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4090 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4091 storage requirements for this matrix. 4092 4093 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4094 processor than it must be used on all processors that share the object for 4095 that argument. 4096 4097 The user MUST specify either the local or global matrix dimensions 4098 (possibly both). 4099 4100 The parallel matrix is partitioned across processors such that the 4101 first m0 rows belong to process 0, the next m1 rows belong to 4102 process 1, the next m2 rows belong to process 2 etc.. where 4103 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4104 values corresponding to [m x N] submatrix. 4105 4106 The columns are logically partitioned with the n0 columns belonging 4107 to 0th partition, the next n1 columns belonging to the next 4108 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4109 4110 The DIAGONAL portion of the local submatrix on any given processor 4111 is the submatrix corresponding to the rows and columns m,n 4112 corresponding to the given processor. i.e diagonal matrix on 4113 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4114 etc. The remaining portion of the local submatrix [m x (N-n)] 4115 constitute the OFF-DIAGONAL portion. The example below better 4116 illustrates this concept. 4117 4118 For a square global matrix we define each processor's diagonal portion 4119 to be its local rows and the corresponding columns (a square submatrix); 4120 each processor's off-diagonal portion encompasses the remainder of the 4121 local matrix (a rectangular submatrix). 4122 4123 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4124 4125 When calling this routine with a single process communicator, a matrix of 4126 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4127 type of communicator, use the construction mechanism: 4128 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4129 4130 By default, this format uses inodes (identical nodes) when possible. 4131 We search for consecutive rows with the same nonzero structure, thereby 4132 reusing matrix information to achieve increased efficiency. 4133 4134 Options Database Keys: 4135 + -mat_no_inode - Do not use inodes 4136 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4137 - -mat_aij_oneindex - Internally use indexing starting at 1 4138 rather than 0. Note that when calling MatSetValues(), 4139 the user still MUST index entries starting at 0! 4140 4141 4142 Example usage: 4143 4144 Consider the following 8x8 matrix with 34 non-zero values, that is 4145 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4146 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4147 as follows: 4148 4149 .vb 4150 1 2 0 | 0 3 0 | 0 4 4151 Proc0 0 5 6 | 7 0 0 | 8 0 4152 9 0 10 | 11 0 0 | 12 0 4153 ------------------------------------- 4154 13 0 14 | 15 16 17 | 0 0 4155 Proc1 0 18 0 | 19 20 21 | 0 0 4156 0 0 0 | 22 23 0 | 24 0 4157 ------------------------------------- 4158 Proc2 25 26 27 | 0 0 28 | 29 0 4159 30 0 0 | 31 32 33 | 0 34 4160 .ve 4161 4162 This can be represented as a collection of submatrices as: 4163 4164 .vb 4165 A B C 4166 D E F 4167 G H I 4168 .ve 4169 4170 Where the submatrices A,B,C are owned by proc0, D,E,F are 4171 owned by proc1, G,H,I are owned by proc2. 4172 4173 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4174 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4175 The 'M','N' parameters are 8,8, and have the same values on all procs. 4176 4177 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4178 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4179 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4180 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4181 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4182 matrix, ans [DF] as another SeqAIJ matrix. 4183 4184 When d_nz, o_nz parameters are specified, d_nz storage elements are 4185 allocated for every row of the local diagonal submatrix, and o_nz 4186 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4187 One way to choose d_nz and o_nz is to use the max nonzerors per local 4188 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4189 In this case, the values of d_nz,o_nz are: 4190 .vb 4191 proc0 : dnz = 2, o_nz = 2 4192 proc1 : dnz = 3, o_nz = 2 4193 proc2 : dnz = 1, o_nz = 4 4194 .ve 4195 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4196 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4197 for proc3. i.e we are using 12+15+10=37 storage locations to store 4198 34 values. 4199 4200 When d_nnz, o_nnz parameters are specified, the storage is specified 4201 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4202 In the above case the values for d_nnz,o_nnz are: 4203 .vb 4204 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4205 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4206 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4207 .ve 4208 Here the space allocated is sum of all the above values i.e 34, and 4209 hence pre-allocation is perfect. 4210 4211 Level: intermediate 4212 4213 .keywords: matrix, aij, compressed row, sparse, parallel 4214 4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4216 MPIAIJ, MatCreateMPIAIJWithArrays() 4217 @*/ 4218 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4219 { 4220 PetscErrorCode ierr; 4221 PetscMPIInt size; 4222 4223 PetscFunctionBegin; 4224 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4225 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4226 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4227 if (size > 1) { 4228 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4229 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4230 } else { 4231 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4232 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4233 } 4234 PetscFunctionReturn(0); 4235 } 4236 4237 #undef __FUNCT__ 4238 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4239 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4240 { 4241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4242 4243 PetscFunctionBegin; 4244 if (Ad) *Ad = a->A; 4245 if (Ao) *Ao = a->B; 4246 if (colmap) *colmap = a->garray; 4247 PetscFunctionReturn(0); 4248 } 4249 4250 #undef __FUNCT__ 4251 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4252 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4253 { 4254 PetscErrorCode ierr; 4255 PetscInt i; 4256 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4257 4258 PetscFunctionBegin; 4259 if (coloring->ctype == IS_COLORING_GLOBAL) { 4260 ISColoringValue *allcolors,*colors; 4261 ISColoring ocoloring; 4262 4263 /* set coloring for diagonal portion */ 4264 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4265 4266 /* set coloring for off-diagonal portion */ 4267 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4268 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4269 for (i=0; i<a->B->cmap->n; i++) { 4270 colors[i] = allcolors[a->garray[i]]; 4271 } 4272 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4273 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4274 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4275 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4276 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4277 ISColoringValue *colors; 4278 PetscInt *larray; 4279 ISColoring ocoloring; 4280 4281 /* set coloring for diagonal portion */ 4282 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4283 for (i=0; i<a->A->cmap->n; i++) { 4284 larray[i] = i + A->cmap->rstart; 4285 } 4286 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4287 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4288 for (i=0; i<a->A->cmap->n; i++) { 4289 colors[i] = coloring->colors[larray[i]]; 4290 } 4291 ierr = PetscFree(larray);CHKERRQ(ierr); 4292 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4293 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4294 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4295 4296 /* set coloring for off-diagonal portion */ 4297 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4298 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4299 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4300 for (i=0; i<a->B->cmap->n; i++) { 4301 colors[i] = coloring->colors[larray[i]]; 4302 } 4303 ierr = PetscFree(larray);CHKERRQ(ierr); 4304 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4305 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4306 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4307 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #undef __FUNCT__ 4312 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4313 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4314 { 4315 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4316 PetscErrorCode ierr; 4317 4318 PetscFunctionBegin; 4319 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4320 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4321 PetscFunctionReturn(0); 4322 } 4323 4324 #undef __FUNCT__ 4325 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4326 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4327 { 4328 PetscErrorCode ierr; 4329 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4330 PetscInt *indx; 4331 4332 PetscFunctionBegin; 4333 /* This routine will ONLY return MPIAIJ type matrix */ 4334 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4335 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4336 if (n == PETSC_DECIDE) { 4337 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4338 } 4339 /* Check sum(n) = N */ 4340 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4341 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4342 4343 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4344 rstart -= m; 4345 4346 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4347 for (i=0; i<m; i++) { 4348 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4349 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4350 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4351 } 4352 4353 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4354 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4355 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4356 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4357 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4358 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4359 PetscFunctionReturn(0); 4360 } 4361 4362 #undef __FUNCT__ 4363 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4364 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4365 { 4366 PetscErrorCode ierr; 4367 PetscInt m,N,i,rstart,nnz,Ii; 4368 PetscInt *indx; 4369 PetscScalar *values; 4370 4371 PetscFunctionBegin; 4372 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4373 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4374 for (i=0; i<m; i++) { 4375 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4376 Ii = i + rstart; 4377 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4378 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4379 } 4380 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4381 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4382 PetscFunctionReturn(0); 4383 } 4384 4385 #undef __FUNCT__ 4386 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4387 /*@ 4388 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4389 matrices from each processor 4390 4391 Collective on MPI_Comm 4392 4393 Input Parameters: 4394 + comm - the communicators the parallel matrix will live on 4395 . inmat - the input sequential matrices 4396 . n - number of local columns (or PETSC_DECIDE) 4397 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4398 4399 Output Parameter: 4400 . outmat - the parallel matrix generated 4401 4402 Level: advanced 4403 4404 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4405 4406 @*/ 4407 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4408 { 4409 PetscErrorCode ierr; 4410 PetscMPIInt size; 4411 4412 PetscFunctionBegin; 4413 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4414 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4415 if (size == 1) { 4416 if (scall == MAT_INITIAL_MATRIX) { 4417 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4418 } else { 4419 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4420 } 4421 } else { 4422 if (scall == MAT_INITIAL_MATRIX) { 4423 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4424 } 4425 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4426 } 4427 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4428 PetscFunctionReturn(0); 4429 } 4430 4431 #undef __FUNCT__ 4432 #define __FUNCT__ "MatFileSplit" 4433 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4434 { 4435 PetscErrorCode ierr; 4436 PetscMPIInt rank; 4437 PetscInt m,N,i,rstart,nnz; 4438 size_t len; 4439 const PetscInt *indx; 4440 PetscViewer out; 4441 char *name; 4442 Mat B; 4443 const PetscScalar *values; 4444 4445 PetscFunctionBegin; 4446 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4447 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4448 /* Should this be the type of the diagonal block of A? */ 4449 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4450 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4451 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4452 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4453 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4454 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4455 for (i=0; i<m; i++) { 4456 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4457 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4458 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4459 } 4460 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4461 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4462 4463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4464 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4465 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4466 sprintf(name,"%s.%d",outfile,rank); 4467 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4468 ierr = PetscFree(name);CHKERRQ(ierr); 4469 ierr = MatView(B,out);CHKERRQ(ierr); 4470 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4471 ierr = MatDestroy(&B);CHKERRQ(ierr); 4472 PetscFunctionReturn(0); 4473 } 4474 4475 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4476 #undef __FUNCT__ 4477 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4478 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4479 { 4480 PetscErrorCode ierr; 4481 Mat_Merge_SeqsToMPI *merge; 4482 PetscContainer container; 4483 4484 PetscFunctionBegin; 4485 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4486 if (container) { 4487 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4488 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4489 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4490 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4491 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4500 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4501 ierr = PetscFree(merge);CHKERRQ(ierr); 4502 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4503 } 4504 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4505 PetscFunctionReturn(0); 4506 } 4507 4508 #include <../src/mat/utils/freespace.h> 4509 #include <petscbt.h> 4510 4511 #undef __FUNCT__ 4512 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4513 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4514 { 4515 PetscErrorCode ierr; 4516 MPI_Comm comm; 4517 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4518 PetscMPIInt size,rank,taga,*len_s; 4519 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4520 PetscInt proc,m; 4521 PetscInt **buf_ri,**buf_rj; 4522 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4523 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4524 MPI_Request *s_waits,*r_waits; 4525 MPI_Status *status; 4526 MatScalar *aa=a->a; 4527 MatScalar **abuf_r,*ba_i; 4528 Mat_Merge_SeqsToMPI *merge; 4529 PetscContainer container; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4533 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4534 4535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4536 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4537 4538 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4539 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4540 4541 bi = merge->bi; 4542 bj = merge->bj; 4543 buf_ri = merge->buf_ri; 4544 buf_rj = merge->buf_rj; 4545 4546 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4547 owners = merge->rowmap->range; 4548 len_s = merge->len_s; 4549 4550 /* send and recv matrix values */ 4551 /*-----------------------------*/ 4552 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4553 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4554 4555 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4556 for (proc=0,k=0; proc<size; proc++) { 4557 if (!len_s[proc]) continue; 4558 i = owners[proc]; 4559 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4560 k++; 4561 } 4562 4563 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4564 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4565 ierr = PetscFree(status);CHKERRQ(ierr); 4566 4567 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4568 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4569 4570 /* insert mat values of mpimat */ 4571 /*----------------------------*/ 4572 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4573 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4574 4575 for (k=0; k<merge->nrecv; k++) { 4576 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4577 nrows = *(buf_ri_k[k]); 4578 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4579 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4580 } 4581 4582 /* set values of ba */ 4583 m = merge->rowmap->n; 4584 for (i=0; i<m; i++) { 4585 arow = owners[rank] + i; 4586 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4587 bnzi = bi[i+1] - bi[i]; 4588 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4589 4590 /* add local non-zero vals of this proc's seqmat into ba */ 4591 anzi = ai[arow+1] - ai[arow]; 4592 aj = a->j + ai[arow]; 4593 aa = a->a + ai[arow]; 4594 nextaj = 0; 4595 for (j=0; nextaj<anzi; j++) { 4596 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4597 ba_i[j] += aa[nextaj++]; 4598 } 4599 } 4600 4601 /* add received vals into ba */ 4602 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4603 /* i-th row */ 4604 if (i == *nextrow[k]) { 4605 anzi = *(nextai[k]+1) - *nextai[k]; 4606 aj = buf_rj[k] + *(nextai[k]); 4607 aa = abuf_r[k] + *(nextai[k]); 4608 nextaj = 0; 4609 for (j=0; nextaj<anzi; j++) { 4610 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4611 ba_i[j] += aa[nextaj++]; 4612 } 4613 } 4614 nextrow[k]++; nextai[k]++; 4615 } 4616 } 4617 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4618 } 4619 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 4622 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4623 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4624 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4625 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4626 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4627 PetscFunctionReturn(0); 4628 } 4629 4630 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4631 4632 #undef __FUNCT__ 4633 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4634 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4635 { 4636 PetscErrorCode ierr; 4637 Mat B_mpi; 4638 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4639 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4640 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4641 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4642 PetscInt len,proc,*dnz,*onz,bs,cbs; 4643 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4644 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4645 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4646 MPI_Status *status; 4647 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4648 PetscBT lnkbt; 4649 Mat_Merge_SeqsToMPI *merge; 4650 PetscContainer container; 4651 4652 PetscFunctionBegin; 4653 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4654 4655 /* make sure it is a PETSc comm */ 4656 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4658 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4659 4660 ierr = PetscNew(&merge);CHKERRQ(ierr); 4661 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4662 4663 /* determine row ownership */ 4664 /*---------------------------------------------------------*/ 4665 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4671 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4672 4673 m = merge->rowmap->n; 4674 owners = merge->rowmap->range; 4675 4676 /* determine the number of messages to send, their lengths */ 4677 /*---------------------------------------------------------*/ 4678 len_s = merge->len_s; 4679 4680 len = 0; /* length of buf_si[] */ 4681 merge->nsend = 0; 4682 for (proc=0; proc<size; proc++) { 4683 len_si[proc] = 0; 4684 if (proc == rank) { 4685 len_s[proc] = 0; 4686 } else { 4687 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4688 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4689 } 4690 if (len_s[proc]) { 4691 merge->nsend++; 4692 nrows = 0; 4693 for (i=owners[proc]; i<owners[proc+1]; i++) { 4694 if (ai[i+1] > ai[i]) nrows++; 4695 } 4696 len_si[proc] = 2*(nrows+1); 4697 len += len_si[proc]; 4698 } 4699 } 4700 4701 /* determine the number and length of messages to receive for ij-structure */ 4702 /*-------------------------------------------------------------------------*/ 4703 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4704 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4705 4706 /* post the Irecv of j-structure */ 4707 /*-------------------------------*/ 4708 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4709 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4710 4711 /* post the Isend of j-structure */ 4712 /*--------------------------------*/ 4713 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4714 4715 for (proc=0, k=0; proc<size; proc++) { 4716 if (!len_s[proc]) continue; 4717 i = owners[proc]; 4718 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4719 k++; 4720 } 4721 4722 /* receives and sends of j-structure are complete */ 4723 /*------------------------------------------------*/ 4724 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4725 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4726 4727 /* send and recv i-structure */ 4728 /*---------------------------*/ 4729 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4730 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4731 4732 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4733 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4734 for (proc=0,k=0; proc<size; proc++) { 4735 if (!len_s[proc]) continue; 4736 /* form outgoing message for i-structure: 4737 buf_si[0]: nrows to be sent 4738 [1:nrows]: row index (global) 4739 [nrows+1:2*nrows+1]: i-structure index 4740 */ 4741 /*-------------------------------------------*/ 4742 nrows = len_si[proc]/2 - 1; 4743 buf_si_i = buf_si + nrows+1; 4744 buf_si[0] = nrows; 4745 buf_si_i[0] = 0; 4746 nrows = 0; 4747 for (i=owners[proc]; i<owners[proc+1]; i++) { 4748 anzi = ai[i+1] - ai[i]; 4749 if (anzi) { 4750 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4751 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4752 nrows++; 4753 } 4754 } 4755 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4756 k++; 4757 buf_si += len_si[proc]; 4758 } 4759 4760 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4761 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4762 4763 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4764 for (i=0; i<merge->nrecv; i++) { 4765 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4766 } 4767 4768 ierr = PetscFree(len_si);CHKERRQ(ierr); 4769 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4770 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4771 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4772 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4773 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4774 ierr = PetscFree(status);CHKERRQ(ierr); 4775 4776 /* compute a local seq matrix in each processor */ 4777 /*----------------------------------------------*/ 4778 /* allocate bi array and free space for accumulating nonzero column info */ 4779 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4780 bi[0] = 0; 4781 4782 /* create and initialize a linked list */ 4783 nlnk = N+1; 4784 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4785 4786 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4787 len = ai[owners[rank+1]] - ai[owners[rank]]; 4788 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4789 4790 current_space = free_space; 4791 4792 /* determine symbolic info for each local row */ 4793 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4794 4795 for (k=0; k<merge->nrecv; k++) { 4796 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4797 nrows = *buf_ri_k[k]; 4798 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4799 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4800 } 4801 4802 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4803 len = 0; 4804 for (i=0; i<m; i++) { 4805 bnzi = 0; 4806 /* add local non-zero cols of this proc's seqmat into lnk */ 4807 arow = owners[rank] + i; 4808 anzi = ai[arow+1] - ai[arow]; 4809 aj = a->j + ai[arow]; 4810 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4811 bnzi += nlnk; 4812 /* add received col data into lnk */ 4813 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4814 if (i == *nextrow[k]) { /* i-th row */ 4815 anzi = *(nextai[k]+1) - *nextai[k]; 4816 aj = buf_rj[k] + *nextai[k]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 nextrow[k]++; nextai[k]++; 4820 } 4821 } 4822 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4823 4824 /* if free space is not available, make more free space */ 4825 if (current_space->local_remaining<bnzi) { 4826 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4827 nspacedouble++; 4828 } 4829 /* copy data into free space, then initialize lnk */ 4830 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4831 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4832 4833 current_space->array += bnzi; 4834 current_space->local_used += bnzi; 4835 current_space->local_remaining -= bnzi; 4836 4837 bi[i+1] = bi[i] + bnzi; 4838 } 4839 4840 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4841 4842 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4843 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4844 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4845 4846 /* create symbolic parallel matrix B_mpi */ 4847 /*---------------------------------------*/ 4848 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4849 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4850 if (n==PETSC_DECIDE) { 4851 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4852 } else { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4854 } 4855 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4856 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4857 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4858 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4859 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4860 4861 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4862 B_mpi->assembled = PETSC_FALSE; 4863 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4864 merge->bi = bi; 4865 merge->bj = bj; 4866 merge->buf_ri = buf_ri; 4867 merge->buf_rj = buf_rj; 4868 merge->coi = NULL; 4869 merge->coj = NULL; 4870 merge->owners_co = NULL; 4871 4872 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4873 4874 /* attach the supporting struct to B_mpi for reuse */ 4875 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4876 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4877 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4878 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4879 *mpimat = B_mpi; 4880 4881 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4882 PetscFunctionReturn(0); 4883 } 4884 4885 #undef __FUNCT__ 4886 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4887 /*@C 4888 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4889 matrices from each processor 4890 4891 Collective on MPI_Comm 4892 4893 Input Parameters: 4894 + comm - the communicators the parallel matrix will live on 4895 . seqmat - the input sequential matrices 4896 . m - number of local rows (or PETSC_DECIDE) 4897 . n - number of local columns (or PETSC_DECIDE) 4898 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4899 4900 Output Parameter: 4901 . mpimat - the parallel matrix generated 4902 4903 Level: advanced 4904 4905 Notes: 4906 The dimensions of the sequential matrix in each processor MUST be the same. 4907 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4908 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4909 @*/ 4910 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4911 { 4912 PetscErrorCode ierr; 4913 PetscMPIInt size; 4914 4915 PetscFunctionBegin; 4916 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4917 if (size == 1) { 4918 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 if (scall == MAT_INITIAL_MATRIX) { 4920 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4921 } else { 4922 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4923 } 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4928 if (scall == MAT_INITIAL_MATRIX) { 4929 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4930 } 4931 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4932 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 PetscFunctionReturn(0); 4934 } 4935 4936 #undef __FUNCT__ 4937 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4938 /*@ 4939 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4940 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4941 with MatGetSize() 4942 4943 Not Collective 4944 4945 Input Parameters: 4946 + A - the matrix 4947 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4948 4949 Output Parameter: 4950 . A_loc - the local sequential matrix generated 4951 4952 Level: developer 4953 4954 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4955 4956 @*/ 4957 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4958 { 4959 PetscErrorCode ierr; 4960 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4961 Mat_SeqAIJ *mat,*a,*b; 4962 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4963 MatScalar *aa,*ba,*cam; 4964 PetscScalar *ca; 4965 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4966 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4967 PetscBool match; 4968 4969 PetscFunctionBegin; 4970 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4971 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4972 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4973 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4974 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4975 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4976 aa = a->a; ba = b->a; 4977 if (scall == MAT_INITIAL_MATRIX) { 4978 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4979 ci[0] = 0; 4980 for (i=0; i<am; i++) { 4981 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4982 } 4983 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4984 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4985 k = 0; 4986 for (i=0; i<am; i++) { 4987 ncols_o = bi[i+1] - bi[i]; 4988 ncols_d = ai[i+1] - ai[i]; 4989 /* off-diagonal portion of A */ 4990 for (jo=0; jo<ncols_o; jo++) { 4991 col = cmap[*bj]; 4992 if (col >= cstart) break; 4993 cj[k] = col; bj++; 4994 ca[k++] = *ba++; 4995 } 4996 /* diagonal portion of A */ 4997 for (j=0; j<ncols_d; j++) { 4998 cj[k] = cstart + *aj++; 4999 ca[k++] = *aa++; 5000 } 5001 /* off-diagonal portion of A */ 5002 for (j=jo; j<ncols_o; j++) { 5003 cj[k] = cmap[*bj++]; 5004 ca[k++] = *ba++; 5005 } 5006 } 5007 /* put together the new matrix */ 5008 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5009 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5010 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5011 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5012 mat->free_a = PETSC_TRUE; 5013 mat->free_ij = PETSC_TRUE; 5014 mat->nonew = 0; 5015 } else if (scall == MAT_REUSE_MATRIX) { 5016 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5017 ci = mat->i; cj = mat->j; cam = mat->a; 5018 for (i=0; i<am; i++) { 5019 /* off-diagonal portion of A */ 5020 ncols_o = bi[i+1] - bi[i]; 5021 for (jo=0; jo<ncols_o; jo++) { 5022 col = cmap[*bj]; 5023 if (col >= cstart) break; 5024 *cam++ = *ba++; bj++; 5025 } 5026 /* diagonal portion of A */ 5027 ncols_d = ai[i+1] - ai[i]; 5028 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5029 /* off-diagonal portion of A */ 5030 for (j=jo; j<ncols_o; j++) { 5031 *cam++ = *ba++; bj++; 5032 } 5033 } 5034 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5035 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5036 PetscFunctionReturn(0); 5037 } 5038 5039 #undef __FUNCT__ 5040 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5041 /*@C 5042 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5043 5044 Not Collective 5045 5046 Input Parameters: 5047 + A - the matrix 5048 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5049 - row, col - index sets of rows and columns to extract (or NULL) 5050 5051 Output Parameter: 5052 . A_loc - the local sequential matrix generated 5053 5054 Level: developer 5055 5056 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5057 5058 @*/ 5059 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5060 { 5061 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5062 PetscErrorCode ierr; 5063 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5064 IS isrowa,iscola; 5065 Mat *aloc; 5066 PetscBool match; 5067 5068 PetscFunctionBegin; 5069 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5070 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5071 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5072 if (!row) { 5073 start = A->rmap->rstart; end = A->rmap->rend; 5074 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5075 } else { 5076 isrowa = *row; 5077 } 5078 if (!col) { 5079 start = A->cmap->rstart; 5080 cmap = a->garray; 5081 nzA = a->A->cmap->n; 5082 nzB = a->B->cmap->n; 5083 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5084 ncols = 0; 5085 for (i=0; i<nzB; i++) { 5086 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5087 else break; 5088 } 5089 imark = i; 5090 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5091 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5092 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5093 } else { 5094 iscola = *col; 5095 } 5096 if (scall != MAT_INITIAL_MATRIX) { 5097 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5098 aloc[0] = *A_loc; 5099 } 5100 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5101 *A_loc = aloc[0]; 5102 ierr = PetscFree(aloc);CHKERRQ(ierr); 5103 if (!row) { 5104 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5105 } 5106 if (!col) { 5107 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5108 } 5109 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5110 PetscFunctionReturn(0); 5111 } 5112 5113 #undef __FUNCT__ 5114 #define __FUNCT__ "MatGetBrowsOfAcols" 5115 /*@C 5116 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5117 5118 Collective on Mat 5119 5120 Input Parameters: 5121 + A,B - the matrices in mpiaij format 5122 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5123 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5124 5125 Output Parameter: 5126 + rowb, colb - index sets of rows and columns of B to extract 5127 - B_seq - the sequential matrix generated 5128 5129 Level: developer 5130 5131 @*/ 5132 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5133 { 5134 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5135 PetscErrorCode ierr; 5136 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5137 IS isrowb,iscolb; 5138 Mat *bseq=NULL; 5139 5140 PetscFunctionBegin; 5141 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5142 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5143 } 5144 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5145 5146 if (scall == MAT_INITIAL_MATRIX) { 5147 start = A->cmap->rstart; 5148 cmap = a->garray; 5149 nzA = a->A->cmap->n; 5150 nzB = a->B->cmap->n; 5151 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5152 ncols = 0; 5153 for (i=0; i<nzB; i++) { /* row < local row index */ 5154 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5155 else break; 5156 } 5157 imark = i; 5158 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5159 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5160 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5161 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5162 } else { 5163 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5164 isrowb = *rowb; iscolb = *colb; 5165 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5166 bseq[0] = *B_seq; 5167 } 5168 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5169 *B_seq = bseq[0]; 5170 ierr = PetscFree(bseq);CHKERRQ(ierr); 5171 if (!rowb) { 5172 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5173 } else { 5174 *rowb = isrowb; 5175 } 5176 if (!colb) { 5177 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5178 } else { 5179 *colb = iscolb; 5180 } 5181 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5182 PetscFunctionReturn(0); 5183 } 5184 5185 #undef __FUNCT__ 5186 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5187 /* 5188 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5189 of the OFF-DIAGONAL portion of local A 5190 5191 Collective on Mat 5192 5193 Input Parameters: 5194 + A,B - the matrices in mpiaij format 5195 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5196 5197 Output Parameter: 5198 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5199 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5200 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5201 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5202 5203 Level: developer 5204 5205 */ 5206 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5207 { 5208 VecScatter_MPI_General *gen_to,*gen_from; 5209 PetscErrorCode ierr; 5210 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5211 Mat_SeqAIJ *b_oth; 5212 VecScatter ctx =a->Mvctx; 5213 MPI_Comm comm; 5214 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5215 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5216 PetscScalar *rvalues,*svalues; 5217 MatScalar *b_otha,*bufa,*bufA; 5218 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5219 MPI_Request *rwaits = NULL,*swaits = NULL; 5220 MPI_Status *sstatus,rstatus; 5221 PetscMPIInt jj; 5222 PetscInt *cols,sbs,rbs; 5223 PetscScalar *vals; 5224 5225 PetscFunctionBegin; 5226 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5227 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5228 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5229 } 5230 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5231 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5232 5233 gen_to = (VecScatter_MPI_General*)ctx->todata; 5234 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5235 rvalues = gen_from->values; /* holds the length of receiving row */ 5236 svalues = gen_to->values; /* holds the length of sending row */ 5237 nrecvs = gen_from->n; 5238 nsends = gen_to->n; 5239 5240 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5241 srow = gen_to->indices; /* local row index to be sent */ 5242 sstarts = gen_to->starts; 5243 sprocs = gen_to->procs; 5244 sstatus = gen_to->sstatus; 5245 sbs = gen_to->bs; 5246 rstarts = gen_from->starts; 5247 rprocs = gen_from->procs; 5248 rbs = gen_from->bs; 5249 5250 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5251 if (scall == MAT_INITIAL_MATRIX) { 5252 /* i-array */ 5253 /*---------*/ 5254 /* post receives */ 5255 for (i=0; i<nrecvs; i++) { 5256 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5257 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5258 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5259 } 5260 5261 /* pack the outgoing message */ 5262 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5263 5264 sstartsj[0] = 0; 5265 rstartsj[0] = 0; 5266 len = 0; /* total length of j or a array to be sent */ 5267 k = 0; 5268 for (i=0; i<nsends; i++) { 5269 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5270 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5271 for (j=0; j<nrows; j++) { 5272 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5273 for (l=0; l<sbs; l++) { 5274 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5275 5276 rowlen[j*sbs+l] = ncols; 5277 5278 len += ncols; 5279 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5280 } 5281 k++; 5282 } 5283 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5284 5285 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5286 } 5287 /* recvs and sends of i-array are completed */ 5288 i = nrecvs; 5289 while (i--) { 5290 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5291 } 5292 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5293 5294 /* allocate buffers for sending j and a arrays */ 5295 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5296 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5297 5298 /* create i-array of B_oth */ 5299 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5300 5301 b_othi[0] = 0; 5302 len = 0; /* total length of j or a array to be received */ 5303 k = 0; 5304 for (i=0; i<nrecvs; i++) { 5305 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5306 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5307 for (j=0; j<nrows; j++) { 5308 b_othi[k+1] = b_othi[k] + rowlen[j]; 5309 len += rowlen[j]; k++; 5310 } 5311 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5312 } 5313 5314 /* allocate space for j and a arrrays of B_oth */ 5315 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5316 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5317 5318 /* j-array */ 5319 /*---------*/ 5320 /* post receives of j-array */ 5321 for (i=0; i<nrecvs; i++) { 5322 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5323 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5324 } 5325 5326 /* pack the outgoing message j-array */ 5327 k = 0; 5328 for (i=0; i<nsends; i++) { 5329 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5330 bufJ = bufj+sstartsj[i]; 5331 for (j=0; j<nrows; j++) { 5332 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5333 for (ll=0; ll<sbs; ll++) { 5334 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5335 for (l=0; l<ncols; l++) { 5336 *bufJ++ = cols[l]; 5337 } 5338 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5339 } 5340 } 5341 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5342 } 5343 5344 /* recvs and sends of j-array are completed */ 5345 i = nrecvs; 5346 while (i--) { 5347 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5348 } 5349 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5350 } else if (scall == MAT_REUSE_MATRIX) { 5351 sstartsj = *startsj_s; 5352 rstartsj = *startsj_r; 5353 bufa = *bufa_ptr; 5354 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5355 b_otha = b_oth->a; 5356 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5357 5358 /* a-array */ 5359 /*---------*/ 5360 /* post receives of a-array */ 5361 for (i=0; i<nrecvs; i++) { 5362 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5363 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5364 } 5365 5366 /* pack the outgoing message a-array */ 5367 k = 0; 5368 for (i=0; i<nsends; i++) { 5369 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5370 bufA = bufa+sstartsj[i]; 5371 for (j=0; j<nrows; j++) { 5372 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5373 for (ll=0; ll<sbs; ll++) { 5374 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5375 for (l=0; l<ncols; l++) { 5376 *bufA++ = vals[l]; 5377 } 5378 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5379 } 5380 } 5381 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5382 } 5383 /* recvs and sends of a-array are completed */ 5384 i = nrecvs; 5385 while (i--) { 5386 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5387 } 5388 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5389 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5390 5391 if (scall == MAT_INITIAL_MATRIX) { 5392 /* put together the new matrix */ 5393 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5394 5395 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5396 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5397 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5398 b_oth->free_a = PETSC_TRUE; 5399 b_oth->free_ij = PETSC_TRUE; 5400 b_oth->nonew = 0; 5401 5402 ierr = PetscFree(bufj);CHKERRQ(ierr); 5403 if (!startsj_s || !bufa_ptr) { 5404 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5405 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5406 } else { 5407 *startsj_s = sstartsj; 5408 *startsj_r = rstartsj; 5409 *bufa_ptr = bufa; 5410 } 5411 } 5412 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5413 PetscFunctionReturn(0); 5414 } 5415 5416 #undef __FUNCT__ 5417 #define __FUNCT__ "MatGetCommunicationStructs" 5418 /*@C 5419 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5420 5421 Not Collective 5422 5423 Input Parameters: 5424 . A - The matrix in mpiaij format 5425 5426 Output Parameter: 5427 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5428 . colmap - A map from global column index to local index into lvec 5429 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5430 5431 Level: developer 5432 5433 @*/ 5434 #if defined(PETSC_USE_CTABLE) 5435 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5436 #else 5437 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5438 #endif 5439 { 5440 Mat_MPIAIJ *a; 5441 5442 PetscFunctionBegin; 5443 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5444 PetscValidPointer(lvec, 2); 5445 PetscValidPointer(colmap, 3); 5446 PetscValidPointer(multScatter, 4); 5447 a = (Mat_MPIAIJ*) A->data; 5448 if (lvec) *lvec = a->lvec; 5449 if (colmap) *colmap = a->colmap; 5450 if (multScatter) *multScatter = a->Mvctx; 5451 PetscFunctionReturn(0); 5452 } 5453 5454 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5455 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5457 5458 #undef __FUNCT__ 5459 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5460 /* 5461 Computes (B'*A')' since computing B*A directly is untenable 5462 5463 n p p 5464 ( ) ( ) ( ) 5465 m ( A ) * n ( B ) = m ( C ) 5466 ( ) ( ) ( ) 5467 5468 */ 5469 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5470 { 5471 PetscErrorCode ierr; 5472 Mat At,Bt,Ct; 5473 5474 PetscFunctionBegin; 5475 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5476 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5477 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5478 ierr = MatDestroy(&At);CHKERRQ(ierr); 5479 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5480 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5481 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5482 PetscFunctionReturn(0); 5483 } 5484 5485 #undef __FUNCT__ 5486 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5487 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5488 { 5489 PetscErrorCode ierr; 5490 PetscInt m=A->rmap->n,n=B->cmap->n; 5491 Mat Cmat; 5492 5493 PetscFunctionBegin; 5494 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5495 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5496 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5497 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5498 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5499 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5500 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5501 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5502 5503 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5504 5505 *C = Cmat; 5506 PetscFunctionReturn(0); 5507 } 5508 5509 /* ----------------------------------------------------------------*/ 5510 #undef __FUNCT__ 5511 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5512 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5513 { 5514 PetscErrorCode ierr; 5515 5516 PetscFunctionBegin; 5517 if (scall == MAT_INITIAL_MATRIX) { 5518 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5519 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5520 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5521 } 5522 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5523 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5524 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5525 PetscFunctionReturn(0); 5526 } 5527 5528 #if defined(PETSC_HAVE_MUMPS) 5529 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5530 #endif 5531 #if defined(PETSC_HAVE_PASTIX) 5532 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5533 #endif 5534 #if defined(PETSC_HAVE_SUPERLU_DIST) 5535 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5536 #endif 5537 #if defined(PETSC_HAVE_CLIQUE) 5538 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5539 #endif 5540 5541 /*MC 5542 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5543 5544 Options Database Keys: 5545 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5546 5547 Level: beginner 5548 5549 .seealso: MatCreateAIJ() 5550 M*/ 5551 5552 #undef __FUNCT__ 5553 #define __FUNCT__ "MatCreate_MPIAIJ" 5554 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5555 { 5556 Mat_MPIAIJ *b; 5557 PetscErrorCode ierr; 5558 PetscMPIInt size; 5559 5560 PetscFunctionBegin; 5561 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5562 5563 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5564 B->data = (void*)b; 5565 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5566 B->assembled = PETSC_FALSE; 5567 B->insertmode = NOT_SET_VALUES; 5568 b->size = size; 5569 5570 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5571 5572 /* build cache for off array entries formed */ 5573 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5574 5575 b->donotstash = PETSC_FALSE; 5576 b->colmap = 0; 5577 b->garray = 0; 5578 b->roworiented = PETSC_TRUE; 5579 5580 /* stuff used for matrix vector multiply */ 5581 b->lvec = NULL; 5582 b->Mvctx = NULL; 5583 5584 /* stuff for MatGetRow() */ 5585 b->rowindices = 0; 5586 b->rowvalues = 0; 5587 b->getrowactive = PETSC_FALSE; 5588 5589 /* flexible pointer used in CUSP/CUSPARSE classes */ 5590 b->spptr = NULL; 5591 5592 #if defined(PETSC_HAVE_MUMPS) 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5594 #endif 5595 #if defined(PETSC_HAVE_PASTIX) 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5597 #endif 5598 #if defined(PETSC_HAVE_SUPERLU_DIST) 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5600 #endif 5601 #if defined(PETSC_HAVE_CLIQUE) 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5603 #endif 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5617 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5618 PetscFunctionReturn(0); 5619 } 5620 5621 #undef __FUNCT__ 5622 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5623 /*@ 5624 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5625 and "off-diagonal" part of the matrix in CSR format. 5626 5627 Collective on MPI_Comm 5628 5629 Input Parameters: 5630 + comm - MPI communicator 5631 . m - number of local rows (Cannot be PETSC_DECIDE) 5632 . n - This value should be the same as the local size used in creating the 5633 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5634 calculated if N is given) For square matrices n is almost always m. 5635 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5636 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5637 . i - row indices for "diagonal" portion of matrix 5638 . j - column indices 5639 . a - matrix values 5640 . oi - row indices for "off-diagonal" portion of matrix 5641 . oj - column indices 5642 - oa - matrix values 5643 5644 Output Parameter: 5645 . mat - the matrix 5646 5647 Level: advanced 5648 5649 Notes: 5650 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5651 must free the arrays once the matrix has been destroyed and not before. 5652 5653 The i and j indices are 0 based 5654 5655 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5656 5657 This sets local rows and cannot be used to set off-processor values. 5658 5659 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5660 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5661 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5662 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5663 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5664 communication if it is known that only local entries will be set. 5665 5666 .keywords: matrix, aij, compressed row, sparse, parallel 5667 5668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5669 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5670 @*/ 5671 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5672 { 5673 PetscErrorCode ierr; 5674 Mat_MPIAIJ *maij; 5675 5676 PetscFunctionBegin; 5677 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5678 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5679 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5680 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5681 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5682 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5683 maij = (Mat_MPIAIJ*) (*mat)->data; 5684 5685 (*mat)->preallocated = PETSC_TRUE; 5686 5687 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5688 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5689 5690 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5691 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5692 5693 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5694 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5695 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5696 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5697 5698 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5699 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5700 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5701 PetscFunctionReturn(0); 5702 } 5703 5704 /* 5705 Special version for direct calls from Fortran 5706 */ 5707 #include <petsc-private/fortranimpl.h> 5708 5709 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5710 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5711 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5712 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5713 #endif 5714 5715 /* Change these macros so can be used in void function */ 5716 #undef CHKERRQ 5717 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5718 #undef SETERRQ2 5719 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5720 #undef SETERRQ3 5721 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5722 #undef SETERRQ 5723 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5724 5725 #undef __FUNCT__ 5726 #define __FUNCT__ "matsetvaluesmpiaij_" 5727 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5728 { 5729 Mat mat = *mmat; 5730 PetscInt m = *mm, n = *mn; 5731 InsertMode addv = *maddv; 5732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5733 PetscScalar value; 5734 PetscErrorCode ierr; 5735 5736 MatCheckPreallocated(mat,1); 5737 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5738 5739 #if defined(PETSC_USE_DEBUG) 5740 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5741 #endif 5742 { 5743 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5744 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5745 PetscBool roworiented = aij->roworiented; 5746 5747 /* Some Variables required in the macro */ 5748 Mat A = aij->A; 5749 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5750 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5751 MatScalar *aa = a->a; 5752 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5753 Mat B = aij->B; 5754 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5755 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5756 MatScalar *ba = b->a; 5757 5758 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5759 PetscInt nonew = a->nonew; 5760 MatScalar *ap1,*ap2; 5761 5762 PetscFunctionBegin; 5763 for (i=0; i<m; i++) { 5764 if (im[i] < 0) continue; 5765 #if defined(PETSC_USE_DEBUG) 5766 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5767 #endif 5768 if (im[i] >= rstart && im[i] < rend) { 5769 row = im[i] - rstart; 5770 lastcol1 = -1; 5771 rp1 = aj + ai[row]; 5772 ap1 = aa + ai[row]; 5773 rmax1 = aimax[row]; 5774 nrow1 = ailen[row]; 5775 low1 = 0; 5776 high1 = nrow1; 5777 lastcol2 = -1; 5778 rp2 = bj + bi[row]; 5779 ap2 = ba + bi[row]; 5780 rmax2 = bimax[row]; 5781 nrow2 = bilen[row]; 5782 low2 = 0; 5783 high2 = nrow2; 5784 5785 for (j=0; j<n; j++) { 5786 if (roworiented) value = v[i*n+j]; 5787 else value = v[i+j*m]; 5788 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5789 if (in[j] >= cstart && in[j] < cend) { 5790 col = in[j] - cstart; 5791 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5792 } else if (in[j] < 0) continue; 5793 #if defined(PETSC_USE_DEBUG) 5794 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5795 #endif 5796 else { 5797 if (mat->was_assembled) { 5798 if (!aij->colmap) { 5799 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5800 } 5801 #if defined(PETSC_USE_CTABLE) 5802 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5803 col--; 5804 #else 5805 col = aij->colmap[in[j]] - 1; 5806 #endif 5807 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5808 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5809 col = in[j]; 5810 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5811 B = aij->B; 5812 b = (Mat_SeqAIJ*)B->data; 5813 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5814 rp2 = bj + bi[row]; 5815 ap2 = ba + bi[row]; 5816 rmax2 = bimax[row]; 5817 nrow2 = bilen[row]; 5818 low2 = 0; 5819 high2 = nrow2; 5820 bm = aij->B->rmap->n; 5821 ba = b->a; 5822 } 5823 } else col = in[j]; 5824 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5825 } 5826 } 5827 } else if (!aij->donotstash) { 5828 if (roworiented) { 5829 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5830 } else { 5831 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5832 } 5833 } 5834 } 5835 } 5836 PetscFunctionReturnVoid(); 5837 } 5838 5839