1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2530 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2531 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2532 for (i=0; i<redund->nrecvs; i++) { 2533 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2534 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2535 } 2536 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2537 2538 if (redund->psubcomm) { 2539 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2540 } 2541 2542 ierr = redund->Destroy(A);CHKERRQ(ierr); 2543 ierr = PetscFree(redund);CHKERRQ(ierr); 2544 } 2545 PetscFunctionReturn(0); 2546 } 2547 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2550 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2551 { 2552 PetscMPIInt rank,size; 2553 MPI_Comm comm,subcomm=psubcomm->comm; 2554 PetscErrorCode ierr; 2555 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2556 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2557 PetscInt *rowrange = mat->rmap->range; 2558 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2559 Mat A = aij->A,B=aij->B,C=*matredundant; 2560 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2561 PetscScalar *sbuf_a; 2562 PetscInt nzlocal=a->nz+b->nz; 2563 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2564 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2565 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2566 MatScalar *aworkA,*aworkB; 2567 PetscScalar *vals; 2568 PetscMPIInt tag1,tag2,tag3,imdex; 2569 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2570 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2571 MPI_Status recv_status,*send_status; 2572 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2573 PetscInt **rbuf_j=NULL; 2574 PetscScalar **rbuf_a=NULL; 2575 Mat_Redundant *redund =NULL; 2576 2577 PetscFunctionBegin; 2578 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2579 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2580 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2581 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2582 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2583 2584 if (reuse == MAT_REUSE_MATRIX) { 2585 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2586 if (subsize == 1) { 2587 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2588 redund = c->redundant; 2589 } else { 2590 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2591 redund = c->redundant; 2592 } 2593 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2594 2595 nsends = redund->nsends; 2596 nrecvs = redund->nrecvs; 2597 send_rank = redund->send_rank; 2598 recv_rank = redund->recv_rank; 2599 sbuf_nz = redund->sbuf_nz; 2600 rbuf_nz = redund->rbuf_nz; 2601 sbuf_j = redund->sbuf_j; 2602 sbuf_a = redund->sbuf_a; 2603 rbuf_j = redund->rbuf_j; 2604 rbuf_a = redund->rbuf_a; 2605 } 2606 2607 if (reuse == MAT_INITIAL_MATRIX) { 2608 PetscInt nleftover,np_subcomm; 2609 2610 /* get the destination processors' id send_rank, nsends and nrecvs */ 2611 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2612 2613 np_subcomm = size/nsubcomm; 2614 nleftover = size - nsubcomm*np_subcomm; 2615 2616 nsends = 0; nrecvs = 0; 2617 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2618 /* -------------------------------------------*/ 2619 for (i=0; i<size; i++) { 2620 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2621 send_rank[nsends] = i; nsends++; 2622 recv_rank[nrecvs++] = i; 2623 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2624 } 2625 } 2626 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2627 i = size-nleftover-1; 2628 j = 0; 2629 while (j < nsubcomm - nleftover) { 2630 send_rank[nsends++] = i; 2631 i--; j++; 2632 /* printf("[%d] send to [%d]\n",rank,i); */ 2633 } 2634 } 2635 2636 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2637 for (i=0; i<nleftover; i++) { 2638 recv_rank[nrecvs++] = size-nleftover+i; 2639 /* printf("[%d] recv from [%d]\n",rank,i); */ 2640 } 2641 } 2642 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2643 /* --------------------------------------------------*/ 2644 /* ---------- new imples: use MatGetSubMatrices() ------------*/ 2645 PetscBool flg=PETSC_FALSE; 2646 ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr); 2647 if (flg) { 2648 Mat *matseq; 2649 IS isrow,iscol; 2650 PetscInt mloc_sub,rstart,rend; 2651 2652 /* create redundant matrix */ 2653 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2654 ierr = MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2655 ierr = MatSetUp(C);CHKERRQ(ierr); 2656 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2657 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2658 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2659 2660 ierr = MatGetOwnershipRange(C,&rstart,&rend);CHKERRQ(ierr); 2661 mloc_sub = rend - rstart; 2662 2663 printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); 2664 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2665 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2666 2667 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&matseq);CHKERRQ(ierr); 2668 if (rank==1) { 2669 printf("[%d] matsub:\n",rank); 2670 ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2671 } 2672 2673 /* Create redundant matrix by concatenating local sequential 2674 matrices from processors in this subcomm */ 2675 ierr = MatDestroy(&C);CHKERRQ(ierr); 2676 2677 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr); 2678 *matredundant = C; 2679 2680 if (nsubcomm == 1) { 2681 if (!rank) printf( "C\n"); 2682 ierr = MatView(C,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2683 } 2684 2685 ierr = MatDestroy(&matseq[0]);CHKERRQ(ierr); 2686 ierr = PetscFree(matseq);CHKERRQ(ierr); 2687 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 2688 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 2689 //ierr = MPI_Barrier(comm);CHKERRQ(ierr); 2690 //SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No done yet"); 2691 PetscFunctionReturn(0); 2692 } 2693 2694 /*---------------------------------- */ 2695 PetscInt color,subcommstart; 2696 subcommstart=0; 2697 for (color=0; color<nsubcomm; color++) { 2698 if (psubcomm->color != color) { 2699 for (i=0; i<psubcomm->subsize[color]; i++) { 2700 if (subrank == i) { /* my_subrank == other's subrank */ 2701 send_rank[nsends++] = subcommstart+i; 2702 recv_rank[nrecvs++] = subcommstart+i; 2703 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2704 } 2705 } 2706 } 2707 subcommstart += psubcomm->subsize[color]; 2708 } 2709 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2710 subcommstart=0; 2711 for (color=0; color<nsubcomm; color++) { 2712 subcommstart += psubcomm->subsize[color]; 2713 if (psubcomm->color == color) continue; 2714 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2715 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2716 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2717 } 2718 } 2719 } 2720 2721 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2722 subcommstart=0; 2723 for (color=0; color<nsubcomm; color++) { 2724 subcommstart += psubcomm->subsize[color]; 2725 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2726 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2727 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2728 } 2729 } 2730 } 2731 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type); 2732 2733 /* allocate sbuf_j, sbuf_a */ 2734 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2735 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2736 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2737 /* 2738 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2739 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2740 */ 2741 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2742 2743 /* copy mat's local entries into the buffers */ 2744 if (reuse == MAT_INITIAL_MATRIX) { 2745 rownz_max = 0; 2746 rptr = sbuf_j; 2747 cols = sbuf_j + rend-rstart + 1; 2748 vals = sbuf_a; 2749 rptr[0] = 0; 2750 for (i=0; i<rend-rstart; i++) { 2751 row = i + rstart; 2752 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2753 ncols = nzA + nzB; 2754 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2755 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2756 /* load the column indices for this row into cols */ 2757 lwrite = 0; 2758 for (l=0; l<nzB; l++) { 2759 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2760 vals[lwrite] = aworkB[l]; 2761 cols[lwrite++] = ctmp; 2762 } 2763 } 2764 for (l=0; l<nzA; l++) { 2765 vals[lwrite] = aworkA[l]; 2766 cols[lwrite++] = cstart + cworkA[l]; 2767 } 2768 for (l=0; l<nzB; l++) { 2769 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2770 vals[lwrite] = aworkB[l]; 2771 cols[lwrite++] = ctmp; 2772 } 2773 } 2774 vals += ncols; 2775 cols += ncols; 2776 rptr[i+1] = rptr[i] + ncols; 2777 if (rownz_max < ncols) rownz_max = ncols; 2778 } 2779 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2780 } else { /* only copy matrix values into sbuf_a */ 2781 rptr = sbuf_j; 2782 vals = sbuf_a; 2783 rptr[0] = 0; 2784 for (i=0; i<rend-rstart; i++) { 2785 row = i + rstart; 2786 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2787 ncols = nzA + nzB; 2788 cworkB = b->j + b->i[i]; 2789 aworkA = a->a + a->i[i]; 2790 aworkB = b->a + b->i[i]; 2791 lwrite = 0; 2792 for (l=0; l<nzB; l++) { 2793 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2794 } 2795 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2796 for (l=0; l<nzB; l++) { 2797 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2798 } 2799 vals += ncols; 2800 rptr[i+1] = rptr[i] + ncols; 2801 } 2802 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2803 2804 /* send nzlocal to others, and recv other's nzlocal */ 2805 /*--------------------------------------------------*/ 2806 if (reuse == MAT_INITIAL_MATRIX) { 2807 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2808 2809 s_waits2 = s_waits3 + nsends; 2810 s_waits1 = s_waits2 + nsends; 2811 r_waits1 = s_waits1 + nsends; 2812 r_waits2 = r_waits1 + nrecvs; 2813 r_waits3 = r_waits2 + nrecvs; 2814 } else { 2815 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2816 2817 r_waits3 = s_waits3 + nsends; 2818 } 2819 2820 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2821 if (reuse == MAT_INITIAL_MATRIX) { 2822 /* get new tags to keep the communication clean */ 2823 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2824 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2825 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2826 2827 /* post receives of other's nzlocal */ 2828 for (i=0; i<nrecvs; i++) { 2829 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2830 } 2831 /* send nzlocal to others */ 2832 for (i=0; i<nsends; i++) { 2833 sbuf_nz[i] = nzlocal; 2834 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2835 } 2836 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2837 count = nrecvs; 2838 while (count) { 2839 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2840 2841 recv_rank[imdex] = recv_status.MPI_SOURCE; 2842 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2843 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2844 2845 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2846 2847 rbuf_nz[imdex] += i + 2; 2848 2849 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2850 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2851 count--; 2852 } 2853 /* wait on sends of nzlocal */ 2854 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2855 /* send mat->i,j to others, and recv from other's */ 2856 /*------------------------------------------------*/ 2857 for (i=0; i<nsends; i++) { 2858 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2859 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2860 } 2861 /* wait on receives of mat->i,j */ 2862 /*------------------------------*/ 2863 count = nrecvs; 2864 while (count) { 2865 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2866 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2867 count--; 2868 } 2869 /* wait on sends of mat->i,j */ 2870 /*---------------------------*/ 2871 if (nsends) { 2872 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2873 } 2874 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2875 2876 /* post receives, send and receive mat->a */ 2877 /*----------------------------------------*/ 2878 for (imdex=0; imdex<nrecvs; imdex++) { 2879 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2880 } 2881 for (i=0; i<nsends; i++) { 2882 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2883 } 2884 count = nrecvs; 2885 while (count) { 2886 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2887 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2888 count--; 2889 } 2890 if (nsends) { 2891 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2892 } 2893 2894 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2895 2896 /* create redundant matrix */ 2897 /*-------------------------*/ 2898 if (reuse == MAT_INITIAL_MATRIX) { 2899 const PetscInt *range; 2900 PetscInt rstart_sub,rend_sub,mloc_sub; 2901 2902 /* compute rownz_max for preallocation */ 2903 for (imdex=0; imdex<nrecvs; imdex++) { 2904 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2905 rptr = rbuf_j[imdex]; 2906 for (i=0; i<j; i++) { 2907 ncols = rptr[i+1] - rptr[i]; 2908 if (rownz_max < ncols) rownz_max = ncols; 2909 } 2910 } 2911 2912 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2913 2914 /* get local size of redundant matrix 2915 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2916 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2917 rstart_sub = range[nsubcomm*subrank]; 2918 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2919 rend_sub = range[nsubcomm*(subrank+1)]; 2920 } else { 2921 rend_sub = mat->rmap->N; 2922 } 2923 mloc_sub = rend_sub - rstart_sub; 2924 2925 if (M == N) { 2926 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2927 } else { /* non-square matrix */ 2928 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2929 } 2930 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2931 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2932 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2933 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2934 } else { 2935 C = *matredundant; 2936 } 2937 2938 /* insert local matrix entries */ 2939 rptr = sbuf_j; 2940 cols = sbuf_j + rend-rstart + 1; 2941 vals = sbuf_a; 2942 for (i=0; i<rend-rstart; i++) { 2943 row = i + rstart; 2944 ncols = rptr[i+1] - rptr[i]; 2945 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2946 vals += ncols; 2947 cols += ncols; 2948 } 2949 /* insert received matrix entries */ 2950 for (imdex=0; imdex<nrecvs; imdex++) { 2951 rstart = rowrange[recv_rank[imdex]]; 2952 rend = rowrange[recv_rank[imdex]+1]; 2953 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2954 rptr = rbuf_j[imdex]; 2955 cols = rbuf_j[imdex] + rend-rstart + 1; 2956 vals = rbuf_a[imdex]; 2957 for (i=0; i<rend-rstart; i++) { 2958 row = i + rstart; 2959 ncols = rptr[i+1] - rptr[i]; 2960 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2961 vals += ncols; 2962 cols += ncols; 2963 } 2964 } 2965 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2966 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2967 2968 if (reuse == MAT_INITIAL_MATRIX) { 2969 *matredundant = C; 2970 2971 /* create a supporting struct and attach it to C for reuse */ 2972 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2973 if (subsize == 1) { 2974 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2975 c->redundant = redund; 2976 } else { 2977 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2978 c->redundant = redund; 2979 } 2980 2981 redund->nzlocal = nzlocal; 2982 redund->nsends = nsends; 2983 redund->nrecvs = nrecvs; 2984 redund->send_rank = send_rank; 2985 redund->recv_rank = recv_rank; 2986 redund->sbuf_nz = sbuf_nz; 2987 redund->rbuf_nz = rbuf_nz; 2988 redund->sbuf_j = sbuf_j; 2989 redund->sbuf_a = sbuf_a; 2990 redund->rbuf_j = rbuf_j; 2991 redund->rbuf_a = rbuf_a; 2992 redund->psubcomm = NULL; 2993 2994 redund->Destroy = C->ops->destroy; 2995 C->ops->destroy = MatDestroy_MatRedundant; 2996 } 2997 PetscFunctionReturn(0); 2998 } 2999 3000 #undef __FUNCT__ 3001 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 3002 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 3003 { 3004 PetscErrorCode ierr; 3005 3006 PetscFunctionBegin; 3007 if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */ 3008 MPI_Comm comm; 3009 PetscSubcomm psubcomm; 3010 PetscMPIInt size,subsize; 3011 3012 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3013 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3014 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 3015 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 3016 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 3017 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 3018 3019 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 3020 3021 /* free psubcomm in MatDestroy_MatRedundant() */ 3022 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 3023 if (subsize == 1) { 3024 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3025 c->redundant->psubcomm = psubcomm; 3026 } else { 3027 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3028 c->redundant->psubcomm = psubcomm ; 3029 } 3030 } else { 3031 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 3032 } 3033 PetscFunctionReturn(0); 3034 } 3035 3036 #undef __FUNCT__ 3037 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3038 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3039 { 3040 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3041 PetscErrorCode ierr; 3042 PetscInt i,*idxb = 0; 3043 PetscScalar *va,*vb; 3044 Vec vtmp; 3045 3046 PetscFunctionBegin; 3047 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3048 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3049 if (idx) { 3050 for (i=0; i<A->rmap->n; i++) { 3051 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3052 } 3053 } 3054 3055 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3056 if (idx) { 3057 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3058 } 3059 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3060 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3061 3062 for (i=0; i<A->rmap->n; i++) { 3063 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3064 va[i] = vb[i]; 3065 if (idx) idx[i] = a->garray[idxb[i]]; 3066 } 3067 } 3068 3069 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3070 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3071 ierr = PetscFree(idxb);CHKERRQ(ierr); 3072 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3073 PetscFunctionReturn(0); 3074 } 3075 3076 #undef __FUNCT__ 3077 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3078 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3079 { 3080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3081 PetscErrorCode ierr; 3082 PetscInt i,*idxb = 0; 3083 PetscScalar *va,*vb; 3084 Vec vtmp; 3085 3086 PetscFunctionBegin; 3087 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3088 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3089 if (idx) { 3090 for (i=0; i<A->cmap->n; i++) { 3091 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3092 } 3093 } 3094 3095 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3096 if (idx) { 3097 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3098 } 3099 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3100 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3101 3102 for (i=0; i<A->rmap->n; i++) { 3103 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3104 va[i] = vb[i]; 3105 if (idx) idx[i] = a->garray[idxb[i]]; 3106 } 3107 } 3108 3109 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3110 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3111 ierr = PetscFree(idxb);CHKERRQ(ierr); 3112 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3113 PetscFunctionReturn(0); 3114 } 3115 3116 #undef __FUNCT__ 3117 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3118 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3119 { 3120 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3121 PetscInt n = A->rmap->n; 3122 PetscInt cstart = A->cmap->rstart; 3123 PetscInt *cmap = mat->garray; 3124 PetscInt *diagIdx, *offdiagIdx; 3125 Vec diagV, offdiagV; 3126 PetscScalar *a, *diagA, *offdiagA; 3127 PetscInt r; 3128 PetscErrorCode ierr; 3129 3130 PetscFunctionBegin; 3131 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3132 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3133 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3134 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3135 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3136 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3137 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3138 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3139 for (r = 0; r < n; ++r) { 3140 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3141 a[r] = diagA[r]; 3142 idx[r] = cstart + diagIdx[r]; 3143 } else { 3144 a[r] = offdiagA[r]; 3145 idx[r] = cmap[offdiagIdx[r]]; 3146 } 3147 } 3148 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3149 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3150 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3151 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3152 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3153 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3154 PetscFunctionReturn(0); 3155 } 3156 3157 #undef __FUNCT__ 3158 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3159 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3160 { 3161 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3162 PetscInt n = A->rmap->n; 3163 PetscInt cstart = A->cmap->rstart; 3164 PetscInt *cmap = mat->garray; 3165 PetscInt *diagIdx, *offdiagIdx; 3166 Vec diagV, offdiagV; 3167 PetscScalar *a, *diagA, *offdiagA; 3168 PetscInt r; 3169 PetscErrorCode ierr; 3170 3171 PetscFunctionBegin; 3172 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3173 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3174 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3175 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3176 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3177 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3178 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3179 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3180 for (r = 0; r < n; ++r) { 3181 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3182 a[r] = diagA[r]; 3183 idx[r] = cstart + diagIdx[r]; 3184 } else { 3185 a[r] = offdiagA[r]; 3186 idx[r] = cmap[offdiagIdx[r]]; 3187 } 3188 } 3189 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3190 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3191 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3192 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3193 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3194 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3195 PetscFunctionReturn(0); 3196 } 3197 3198 #undef __FUNCT__ 3199 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3200 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3201 { 3202 PetscErrorCode ierr; 3203 Mat *dummy; 3204 3205 PetscFunctionBegin; 3206 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3207 *newmat = *dummy; 3208 ierr = PetscFree(dummy);CHKERRQ(ierr); 3209 PetscFunctionReturn(0); 3210 } 3211 3212 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3216 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3217 { 3218 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3228 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3229 { 3230 PetscErrorCode ierr; 3231 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3232 3233 PetscFunctionBegin; 3234 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3235 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3236 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3237 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3238 PetscFunctionReturn(0); 3239 } 3240 3241 /* -------------------------------------------------------------------*/ 3242 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3243 MatGetRow_MPIAIJ, 3244 MatRestoreRow_MPIAIJ, 3245 MatMult_MPIAIJ, 3246 /* 4*/ MatMultAdd_MPIAIJ, 3247 MatMultTranspose_MPIAIJ, 3248 MatMultTransposeAdd_MPIAIJ, 3249 #if defined(PETSC_HAVE_PBGL) 3250 MatSolve_MPIAIJ, 3251 #else 3252 0, 3253 #endif 3254 0, 3255 0, 3256 /*10*/ 0, 3257 0, 3258 0, 3259 MatSOR_MPIAIJ, 3260 MatTranspose_MPIAIJ, 3261 /*15*/ MatGetInfo_MPIAIJ, 3262 MatEqual_MPIAIJ, 3263 MatGetDiagonal_MPIAIJ, 3264 MatDiagonalScale_MPIAIJ, 3265 MatNorm_MPIAIJ, 3266 /*20*/ MatAssemblyBegin_MPIAIJ, 3267 MatAssemblyEnd_MPIAIJ, 3268 MatSetOption_MPIAIJ, 3269 MatZeroEntries_MPIAIJ, 3270 /*24*/ MatZeroRows_MPIAIJ, 3271 0, 3272 #if defined(PETSC_HAVE_PBGL) 3273 0, 3274 #else 3275 0, 3276 #endif 3277 0, 3278 0, 3279 /*29*/ MatSetUp_MPIAIJ, 3280 #if defined(PETSC_HAVE_PBGL) 3281 0, 3282 #else 3283 0, 3284 #endif 3285 0, 3286 0, 3287 0, 3288 /*34*/ MatDuplicate_MPIAIJ, 3289 0, 3290 0, 3291 0, 3292 0, 3293 /*39*/ MatAXPY_MPIAIJ, 3294 MatGetSubMatrices_MPIAIJ, 3295 MatIncreaseOverlap_MPIAIJ, 3296 MatGetValues_MPIAIJ, 3297 MatCopy_MPIAIJ, 3298 /*44*/ MatGetRowMax_MPIAIJ, 3299 MatScale_MPIAIJ, 3300 0, 3301 0, 3302 MatZeroRowsColumns_MPIAIJ, 3303 /*49*/ MatSetRandom_MPIAIJ, 3304 0, 3305 0, 3306 0, 3307 0, 3308 /*54*/ MatFDColoringCreate_MPIAIJ, 3309 0, 3310 MatSetUnfactored_MPIAIJ, 3311 MatPermute_MPIAIJ, 3312 0, 3313 /*59*/ MatGetSubMatrix_MPIAIJ, 3314 MatDestroy_MPIAIJ, 3315 MatView_MPIAIJ, 3316 0, 3317 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3318 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3319 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3320 0, 3321 0, 3322 0, 3323 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3324 MatGetRowMinAbs_MPIAIJ, 3325 0, 3326 MatSetColoring_MPIAIJ, 3327 0, 3328 MatSetValuesAdifor_MPIAIJ, 3329 /*75*/ MatFDColoringApply_AIJ, 3330 0, 3331 0, 3332 0, 3333 MatFindZeroDiagonals_MPIAIJ, 3334 /*80*/ 0, 3335 0, 3336 0, 3337 /*83*/ MatLoad_MPIAIJ, 3338 0, 3339 0, 3340 0, 3341 0, 3342 0, 3343 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3344 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3345 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3346 MatPtAP_MPIAIJ_MPIAIJ, 3347 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3348 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3349 0, 3350 0, 3351 0, 3352 0, 3353 /*99*/ 0, 3354 0, 3355 0, 3356 MatConjugate_MPIAIJ, 3357 0, 3358 /*104*/MatSetValuesRow_MPIAIJ, 3359 MatRealPart_MPIAIJ, 3360 MatImaginaryPart_MPIAIJ, 3361 0, 3362 0, 3363 /*109*/0, 3364 MatGetRedundantMatrix_MPIAIJ, 3365 MatGetRowMin_MPIAIJ, 3366 0, 3367 0, 3368 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3369 0, 3370 0, 3371 0, 3372 0, 3373 /*119*/0, 3374 0, 3375 0, 3376 0, 3377 MatGetMultiProcBlock_MPIAIJ, 3378 /*124*/MatFindNonzeroRows_MPIAIJ, 3379 MatGetColumnNorms_MPIAIJ, 3380 MatInvertBlockDiagonal_MPIAIJ, 3381 0, 3382 MatGetSubMatricesParallel_MPIAIJ, 3383 /*129*/0, 3384 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3385 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3386 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3387 0, 3388 /*134*/0, 3389 0, 3390 0, 3391 0, 3392 0, 3393 /*139*/0, 3394 0 3395 }; 3396 3397 /* ----------------------------------------------------------------------------------------*/ 3398 3399 #undef __FUNCT__ 3400 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3401 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3402 { 3403 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3404 PetscErrorCode ierr; 3405 3406 PetscFunctionBegin; 3407 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3408 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3409 PetscFunctionReturn(0); 3410 } 3411 3412 #undef __FUNCT__ 3413 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3414 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3415 { 3416 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3417 PetscErrorCode ierr; 3418 3419 PetscFunctionBegin; 3420 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3421 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 #undef __FUNCT__ 3426 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3427 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3428 { 3429 Mat_MPIAIJ *b; 3430 PetscErrorCode ierr; 3431 3432 PetscFunctionBegin; 3433 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3434 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3435 b = (Mat_MPIAIJ*)B->data; 3436 3437 if (!B->preallocated) { 3438 /* Explicitly create 2 MATSEQAIJ matrices. */ 3439 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3440 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3441 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3442 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3443 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3444 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3445 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3446 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3447 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3448 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3449 } 3450 3451 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3452 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3453 B->preallocated = PETSC_TRUE; 3454 PetscFunctionReturn(0); 3455 } 3456 3457 #undef __FUNCT__ 3458 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3459 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3460 { 3461 Mat mat; 3462 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3463 PetscErrorCode ierr; 3464 3465 PetscFunctionBegin; 3466 *newmat = 0; 3467 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3468 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3469 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3470 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3471 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3472 a = (Mat_MPIAIJ*)mat->data; 3473 3474 mat->factortype = matin->factortype; 3475 mat->rmap->bs = matin->rmap->bs; 3476 mat->cmap->bs = matin->cmap->bs; 3477 mat->assembled = PETSC_TRUE; 3478 mat->insertmode = NOT_SET_VALUES; 3479 mat->preallocated = PETSC_TRUE; 3480 3481 a->size = oldmat->size; 3482 a->rank = oldmat->rank; 3483 a->donotstash = oldmat->donotstash; 3484 a->roworiented = oldmat->roworiented; 3485 a->rowindices = 0; 3486 a->rowvalues = 0; 3487 a->getrowactive = PETSC_FALSE; 3488 3489 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3490 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3491 3492 if (oldmat->colmap) { 3493 #if defined(PETSC_USE_CTABLE) 3494 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3495 #else 3496 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3497 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3498 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3499 #endif 3500 } else a->colmap = 0; 3501 if (oldmat->garray) { 3502 PetscInt len; 3503 len = oldmat->B->cmap->n; 3504 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3505 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3506 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3507 } else a->garray = 0; 3508 3509 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3510 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3511 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3512 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3513 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3514 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3515 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3516 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3517 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3518 *newmat = mat; 3519 PetscFunctionReturn(0); 3520 } 3521 3522 3523 3524 #undef __FUNCT__ 3525 #define __FUNCT__ "MatLoad_MPIAIJ" 3526 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3527 { 3528 PetscScalar *vals,*svals; 3529 MPI_Comm comm; 3530 PetscErrorCode ierr; 3531 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3532 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3533 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3534 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3535 PetscInt cend,cstart,n,*rowners,sizesset=1; 3536 int fd; 3537 PetscInt bs = 1; 3538 3539 PetscFunctionBegin; 3540 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3541 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3542 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3543 if (!rank) { 3544 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3545 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3546 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3547 } 3548 3549 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3550 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3551 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3552 3553 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3554 3555 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3556 M = header[1]; N = header[2]; 3557 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3558 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3559 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3560 3561 /* If global sizes are set, check if they are consistent with that given in the file */ 3562 if (sizesset) { 3563 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3564 } 3565 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3566 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3567 3568 /* determine ownership of all (block) rows */ 3569 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3570 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3571 else m = newMat->rmap->n; /* Set by user */ 3572 3573 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3574 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3575 3576 /* First process needs enough room for process with most rows */ 3577 if (!rank) { 3578 mmax = rowners[1]; 3579 for (i=2; i<=size; i++) { 3580 mmax = PetscMax(mmax, rowners[i]); 3581 } 3582 } else mmax = -1; /* unused, but compilers complain */ 3583 3584 rowners[0] = 0; 3585 for (i=2; i<=size; i++) { 3586 rowners[i] += rowners[i-1]; 3587 } 3588 rstart = rowners[rank]; 3589 rend = rowners[rank+1]; 3590 3591 /* distribute row lengths to all processors */ 3592 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3593 if (!rank) { 3594 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3595 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3596 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3597 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3598 for (j=0; j<m; j++) { 3599 procsnz[0] += ourlens[j]; 3600 } 3601 for (i=1; i<size; i++) { 3602 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3603 /* calculate the number of nonzeros on each processor */ 3604 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3605 procsnz[i] += rowlengths[j]; 3606 } 3607 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3608 } 3609 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3610 } else { 3611 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3612 } 3613 3614 if (!rank) { 3615 /* determine max buffer needed and allocate it */ 3616 maxnz = 0; 3617 for (i=0; i<size; i++) { 3618 maxnz = PetscMax(maxnz,procsnz[i]); 3619 } 3620 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3621 3622 /* read in my part of the matrix column indices */ 3623 nz = procsnz[0]; 3624 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3625 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3626 3627 /* read in every one elses and ship off */ 3628 for (i=1; i<size; i++) { 3629 nz = procsnz[i]; 3630 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3631 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3632 } 3633 ierr = PetscFree(cols);CHKERRQ(ierr); 3634 } else { 3635 /* determine buffer space needed for message */ 3636 nz = 0; 3637 for (i=0; i<m; i++) { 3638 nz += ourlens[i]; 3639 } 3640 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3641 3642 /* receive message of column indices*/ 3643 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3644 } 3645 3646 /* determine column ownership if matrix is not square */ 3647 if (N != M) { 3648 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3649 else n = newMat->cmap->n; 3650 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3651 cstart = cend - n; 3652 } else { 3653 cstart = rstart; 3654 cend = rend; 3655 n = cend - cstart; 3656 } 3657 3658 /* loop over local rows, determining number of off diagonal entries */ 3659 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3660 jj = 0; 3661 for (i=0; i<m; i++) { 3662 for (j=0; j<ourlens[i]; j++) { 3663 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3664 jj++; 3665 } 3666 } 3667 3668 for (i=0; i<m; i++) { 3669 ourlens[i] -= offlens[i]; 3670 } 3671 if (!sizesset) { 3672 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3673 } 3674 3675 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3676 3677 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3678 3679 for (i=0; i<m; i++) { 3680 ourlens[i] += offlens[i]; 3681 } 3682 3683 if (!rank) { 3684 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3685 3686 /* read in my part of the matrix numerical values */ 3687 nz = procsnz[0]; 3688 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3689 3690 /* insert into matrix */ 3691 jj = rstart; 3692 smycols = mycols; 3693 svals = vals; 3694 for (i=0; i<m; i++) { 3695 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3696 smycols += ourlens[i]; 3697 svals += ourlens[i]; 3698 jj++; 3699 } 3700 3701 /* read in other processors and ship out */ 3702 for (i=1; i<size; i++) { 3703 nz = procsnz[i]; 3704 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3705 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3706 } 3707 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3708 } else { 3709 /* receive numeric values */ 3710 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3711 3712 /* receive message of values*/ 3713 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3714 3715 /* insert into matrix */ 3716 jj = rstart; 3717 smycols = mycols; 3718 svals = vals; 3719 for (i=0; i<m; i++) { 3720 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3721 smycols += ourlens[i]; 3722 svals += ourlens[i]; 3723 jj++; 3724 } 3725 } 3726 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3727 ierr = PetscFree(vals);CHKERRQ(ierr); 3728 ierr = PetscFree(mycols);CHKERRQ(ierr); 3729 ierr = PetscFree(rowners);CHKERRQ(ierr); 3730 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3731 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3732 PetscFunctionReturn(0); 3733 } 3734 3735 #undef __FUNCT__ 3736 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3737 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3738 { 3739 PetscErrorCode ierr; 3740 IS iscol_local; 3741 PetscInt csize; 3742 3743 PetscFunctionBegin; 3744 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3745 if (call == MAT_REUSE_MATRIX) { 3746 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3747 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3748 } else { 3749 PetscInt cbs; 3750 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3751 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3752 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3753 } 3754 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3755 if (call == MAT_INITIAL_MATRIX) { 3756 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3757 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3758 } 3759 PetscFunctionReturn(0); 3760 } 3761 3762 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3763 #undef __FUNCT__ 3764 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3765 /* 3766 Not great since it makes two copies of the submatrix, first an SeqAIJ 3767 in local and then by concatenating the local matrices the end result. 3768 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3769 3770 Note: This requires a sequential iscol with all indices. 3771 */ 3772 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3773 { 3774 PetscErrorCode ierr; 3775 PetscMPIInt rank,size; 3776 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3777 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3778 PetscBool allcolumns, colflag; 3779 Mat M,Mreuse; 3780 MatScalar *vwork,*aa; 3781 MPI_Comm comm; 3782 Mat_SeqAIJ *aij; 3783 3784 PetscFunctionBegin; 3785 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3786 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3788 3789 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3790 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3791 if (colflag && ncol == mat->cmap->N) { 3792 allcolumns = PETSC_TRUE; 3793 } else { 3794 allcolumns = PETSC_FALSE; 3795 } 3796 if (call == MAT_REUSE_MATRIX) { 3797 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3798 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3799 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3800 } else { 3801 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3802 } 3803 3804 /* 3805 m - number of local rows 3806 n - number of columns (same on all processors) 3807 rstart - first row in new global matrix generated 3808 */ 3809 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3810 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3811 if (call == MAT_INITIAL_MATRIX) { 3812 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3813 ii = aij->i; 3814 jj = aij->j; 3815 3816 /* 3817 Determine the number of non-zeros in the diagonal and off-diagonal 3818 portions of the matrix in order to do correct preallocation 3819 */ 3820 3821 /* first get start and end of "diagonal" columns */ 3822 if (csize == PETSC_DECIDE) { 3823 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3824 if (mglobal == n) { /* square matrix */ 3825 nlocal = m; 3826 } else { 3827 nlocal = n/size + ((n % size) > rank); 3828 } 3829 } else { 3830 nlocal = csize; 3831 } 3832 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3833 rstart = rend - nlocal; 3834 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3835 3836 /* next, compute all the lengths */ 3837 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3838 olens = dlens + m; 3839 for (i=0; i<m; i++) { 3840 jend = ii[i+1] - ii[i]; 3841 olen = 0; 3842 dlen = 0; 3843 for (j=0; j<jend; j++) { 3844 if (*jj < rstart || *jj >= rend) olen++; 3845 else dlen++; 3846 jj++; 3847 } 3848 olens[i] = olen; 3849 dlens[i] = dlen; 3850 } 3851 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3852 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3853 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3854 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3855 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3856 ierr = PetscFree(dlens);CHKERRQ(ierr); 3857 } else { 3858 PetscInt ml,nl; 3859 3860 M = *newmat; 3861 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3862 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3863 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3864 /* 3865 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3866 rather than the slower MatSetValues(). 3867 */ 3868 M->was_assembled = PETSC_TRUE; 3869 M->assembled = PETSC_FALSE; 3870 } 3871 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3872 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3873 ii = aij->i; 3874 jj = aij->j; 3875 aa = aij->a; 3876 for (i=0; i<m; i++) { 3877 row = rstart + i; 3878 nz = ii[i+1] - ii[i]; 3879 cwork = jj; jj += nz; 3880 vwork = aa; aa += nz; 3881 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3882 } 3883 3884 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3885 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3886 *newmat = M; 3887 3888 /* save submatrix used in processor for next request */ 3889 if (call == MAT_INITIAL_MATRIX) { 3890 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3891 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3892 } 3893 PetscFunctionReturn(0); 3894 } 3895 3896 #undef __FUNCT__ 3897 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3898 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3899 { 3900 PetscInt m,cstart, cend,j,nnz,i,d; 3901 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3902 const PetscInt *JJ; 3903 PetscScalar *values; 3904 PetscErrorCode ierr; 3905 3906 PetscFunctionBegin; 3907 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3908 3909 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3910 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3911 m = B->rmap->n; 3912 cstart = B->cmap->rstart; 3913 cend = B->cmap->rend; 3914 rstart = B->rmap->rstart; 3915 3916 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3917 3918 #if defined(PETSC_USE_DEBUGGING) 3919 for (i=0; i<m; i++) { 3920 nnz = Ii[i+1]- Ii[i]; 3921 JJ = J + Ii[i]; 3922 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3923 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3924 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3925 } 3926 #endif 3927 3928 for (i=0; i<m; i++) { 3929 nnz = Ii[i+1]- Ii[i]; 3930 JJ = J + Ii[i]; 3931 nnz_max = PetscMax(nnz_max,nnz); 3932 d = 0; 3933 for (j=0; j<nnz; j++) { 3934 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3935 } 3936 d_nnz[i] = d; 3937 o_nnz[i] = nnz - d; 3938 } 3939 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3940 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3941 3942 if (v) values = (PetscScalar*)v; 3943 else { 3944 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3945 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3946 } 3947 3948 for (i=0; i<m; i++) { 3949 ii = i + rstart; 3950 nnz = Ii[i+1]- Ii[i]; 3951 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3952 } 3953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3955 3956 if (!v) { 3957 ierr = PetscFree(values);CHKERRQ(ierr); 3958 } 3959 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3960 PetscFunctionReturn(0); 3961 } 3962 3963 #undef __FUNCT__ 3964 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3965 /*@ 3966 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3967 (the default parallel PETSc format). 3968 3969 Collective on MPI_Comm 3970 3971 Input Parameters: 3972 + B - the matrix 3973 . i - the indices into j for the start of each local row (starts with zero) 3974 . j - the column indices for each local row (starts with zero) 3975 - v - optional values in the matrix 3976 3977 Level: developer 3978 3979 Notes: 3980 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3981 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3982 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3983 3984 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3985 3986 The format which is used for the sparse matrix input, is equivalent to a 3987 row-major ordering.. i.e for the following matrix, the input data expected is 3988 as shown: 3989 3990 1 0 0 3991 2 0 3 P0 3992 ------- 3993 4 5 6 P1 3994 3995 Process0 [P0]: rows_owned=[0,1] 3996 i = {0,1,3} [size = nrow+1 = 2+1] 3997 j = {0,0,2} [size = nz = 6] 3998 v = {1,2,3} [size = nz = 6] 3999 4000 Process1 [P1]: rows_owned=[2] 4001 i = {0,3} [size = nrow+1 = 1+1] 4002 j = {0,1,2} [size = nz = 6] 4003 v = {4,5,6} [size = nz = 6] 4004 4005 .keywords: matrix, aij, compressed row, sparse, parallel 4006 4007 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4008 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4009 @*/ 4010 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4011 { 4012 PetscErrorCode ierr; 4013 4014 PetscFunctionBegin; 4015 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4021 /*@C 4022 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4023 (the default parallel PETSc format). For good matrix assembly performance 4024 the user should preallocate the matrix storage by setting the parameters 4025 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4026 performance can be increased by more than a factor of 50. 4027 4028 Collective on MPI_Comm 4029 4030 Input Parameters: 4031 + A - the matrix 4032 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4033 (same value is used for all local rows) 4034 . d_nnz - array containing the number of nonzeros in the various rows of the 4035 DIAGONAL portion of the local submatrix (possibly different for each row) 4036 or NULL, if d_nz is used to specify the nonzero structure. 4037 The size of this array is equal to the number of local rows, i.e 'm'. 4038 For matrices that will be factored, you must leave room for (and set) 4039 the diagonal entry even if it is zero. 4040 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4041 submatrix (same value is used for all local rows). 4042 - o_nnz - array containing the number of nonzeros in the various rows of the 4043 OFF-DIAGONAL portion of the local submatrix (possibly different for 4044 each row) or NULL, if o_nz is used to specify the nonzero 4045 structure. The size of this array is equal to the number 4046 of local rows, i.e 'm'. 4047 4048 If the *_nnz parameter is given then the *_nz parameter is ignored 4049 4050 The AIJ format (also called the Yale sparse matrix format or 4051 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4052 storage. The stored row and column indices begin with zero. 4053 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4054 4055 The parallel matrix is partitioned such that the first m0 rows belong to 4056 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4057 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4058 4059 The DIAGONAL portion of the local submatrix of a processor can be defined 4060 as the submatrix which is obtained by extraction the part corresponding to 4061 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4062 first row that belongs to the processor, r2 is the last row belonging to 4063 the this processor, and c1-c2 is range of indices of the local part of a 4064 vector suitable for applying the matrix to. This is an mxn matrix. In the 4065 common case of a square matrix, the row and column ranges are the same and 4066 the DIAGONAL part is also square. The remaining portion of the local 4067 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4068 4069 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4070 4071 You can call MatGetInfo() to get information on how effective the preallocation was; 4072 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4073 You can also run with the option -info and look for messages with the string 4074 malloc in them to see if additional memory allocation was needed. 4075 4076 Example usage: 4077 4078 Consider the following 8x8 matrix with 34 non-zero values, that is 4079 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4080 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4081 as follows: 4082 4083 .vb 4084 1 2 0 | 0 3 0 | 0 4 4085 Proc0 0 5 6 | 7 0 0 | 8 0 4086 9 0 10 | 11 0 0 | 12 0 4087 ------------------------------------- 4088 13 0 14 | 15 16 17 | 0 0 4089 Proc1 0 18 0 | 19 20 21 | 0 0 4090 0 0 0 | 22 23 0 | 24 0 4091 ------------------------------------- 4092 Proc2 25 26 27 | 0 0 28 | 29 0 4093 30 0 0 | 31 32 33 | 0 34 4094 .ve 4095 4096 This can be represented as a collection of submatrices as: 4097 4098 .vb 4099 A B C 4100 D E F 4101 G H I 4102 .ve 4103 4104 Where the submatrices A,B,C are owned by proc0, D,E,F are 4105 owned by proc1, G,H,I are owned by proc2. 4106 4107 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4108 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4109 The 'M','N' parameters are 8,8, and have the same values on all procs. 4110 4111 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4112 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4113 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4114 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4115 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4116 matrix, ans [DF] as another SeqAIJ matrix. 4117 4118 When d_nz, o_nz parameters are specified, d_nz storage elements are 4119 allocated for every row of the local diagonal submatrix, and o_nz 4120 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4121 One way to choose d_nz and o_nz is to use the max nonzerors per local 4122 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4123 In this case, the values of d_nz,o_nz are: 4124 .vb 4125 proc0 : dnz = 2, o_nz = 2 4126 proc1 : dnz = 3, o_nz = 2 4127 proc2 : dnz = 1, o_nz = 4 4128 .ve 4129 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4130 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4131 for proc3. i.e we are using 12+15+10=37 storage locations to store 4132 34 values. 4133 4134 When d_nnz, o_nnz parameters are specified, the storage is specified 4135 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4136 In the above case the values for d_nnz,o_nnz are: 4137 .vb 4138 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4139 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4140 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4141 .ve 4142 Here the space allocated is sum of all the above values i.e 34, and 4143 hence pre-allocation is perfect. 4144 4145 Level: intermediate 4146 4147 .keywords: matrix, aij, compressed row, sparse, parallel 4148 4149 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4150 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4151 @*/ 4152 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4153 { 4154 PetscErrorCode ierr; 4155 4156 PetscFunctionBegin; 4157 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4158 PetscValidType(B,1); 4159 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4160 PetscFunctionReturn(0); 4161 } 4162 4163 #undef __FUNCT__ 4164 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4165 /*@ 4166 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4167 CSR format the local rows. 4168 4169 Collective on MPI_Comm 4170 4171 Input Parameters: 4172 + comm - MPI communicator 4173 . m - number of local rows (Cannot be PETSC_DECIDE) 4174 . n - This value should be the same as the local size used in creating the 4175 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4176 calculated if N is given) For square matrices n is almost always m. 4177 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4178 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4179 . i - row indices 4180 . j - column indices 4181 - a - matrix values 4182 4183 Output Parameter: 4184 . mat - the matrix 4185 4186 Level: intermediate 4187 4188 Notes: 4189 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4190 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4191 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4192 4193 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4194 4195 The format which is used for the sparse matrix input, is equivalent to a 4196 row-major ordering.. i.e for the following matrix, the input data expected is 4197 as shown: 4198 4199 1 0 0 4200 2 0 3 P0 4201 ------- 4202 4 5 6 P1 4203 4204 Process0 [P0]: rows_owned=[0,1] 4205 i = {0,1,3} [size = nrow+1 = 2+1] 4206 j = {0,0,2} [size = nz = 6] 4207 v = {1,2,3} [size = nz = 6] 4208 4209 Process1 [P1]: rows_owned=[2] 4210 i = {0,3} [size = nrow+1 = 1+1] 4211 j = {0,1,2} [size = nz = 6] 4212 v = {4,5,6} [size = nz = 6] 4213 4214 .keywords: matrix, aij, compressed row, sparse, parallel 4215 4216 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4217 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4218 @*/ 4219 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4220 { 4221 PetscErrorCode ierr; 4222 4223 PetscFunctionBegin; 4224 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4225 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4226 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4227 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4228 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4229 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4230 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4231 PetscFunctionReturn(0); 4232 } 4233 4234 #undef __FUNCT__ 4235 #define __FUNCT__ "MatCreateAIJ" 4236 /*@C 4237 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4238 (the default parallel PETSc format). For good matrix assembly performance 4239 the user should preallocate the matrix storage by setting the parameters 4240 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4241 performance can be increased by more than a factor of 50. 4242 4243 Collective on MPI_Comm 4244 4245 Input Parameters: 4246 + comm - MPI communicator 4247 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4248 This value should be the same as the local size used in creating the 4249 y vector for the matrix-vector product y = Ax. 4250 . n - This value should be the same as the local size used in creating the 4251 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4252 calculated if N is given) For square matrices n is almost always m. 4253 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4254 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4255 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4256 (same value is used for all local rows) 4257 . d_nnz - array containing the number of nonzeros in the various rows of the 4258 DIAGONAL portion of the local submatrix (possibly different for each row) 4259 or NULL, if d_nz is used to specify the nonzero structure. 4260 The size of this array is equal to the number of local rows, i.e 'm'. 4261 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4262 submatrix (same value is used for all local rows). 4263 - o_nnz - array containing the number of nonzeros in the various rows of the 4264 OFF-DIAGONAL portion of the local submatrix (possibly different for 4265 each row) or NULL, if o_nz is used to specify the nonzero 4266 structure. The size of this array is equal to the number 4267 of local rows, i.e 'm'. 4268 4269 Output Parameter: 4270 . A - the matrix 4271 4272 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4273 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4274 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4275 4276 Notes: 4277 If the *_nnz parameter is given then the *_nz parameter is ignored 4278 4279 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4280 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4281 storage requirements for this matrix. 4282 4283 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4284 processor than it must be used on all processors that share the object for 4285 that argument. 4286 4287 The user MUST specify either the local or global matrix dimensions 4288 (possibly both). 4289 4290 The parallel matrix is partitioned across processors such that the 4291 first m0 rows belong to process 0, the next m1 rows belong to 4292 process 1, the next m2 rows belong to process 2 etc.. where 4293 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4294 values corresponding to [m x N] submatrix. 4295 4296 The columns are logically partitioned with the n0 columns belonging 4297 to 0th partition, the next n1 columns belonging to the next 4298 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4299 4300 The DIAGONAL portion of the local submatrix on any given processor 4301 is the submatrix corresponding to the rows and columns m,n 4302 corresponding to the given processor. i.e diagonal matrix on 4303 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4304 etc. The remaining portion of the local submatrix [m x (N-n)] 4305 constitute the OFF-DIAGONAL portion. The example below better 4306 illustrates this concept. 4307 4308 For a square global matrix we define each processor's diagonal portion 4309 to be its local rows and the corresponding columns (a square submatrix); 4310 each processor's off-diagonal portion encompasses the remainder of the 4311 local matrix (a rectangular submatrix). 4312 4313 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4314 4315 When calling this routine with a single process communicator, a matrix of 4316 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4317 type of communicator, use the construction mechanism: 4318 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4319 4320 By default, this format uses inodes (identical nodes) when possible. 4321 We search for consecutive rows with the same nonzero structure, thereby 4322 reusing matrix information to achieve increased efficiency. 4323 4324 Options Database Keys: 4325 + -mat_no_inode - Do not use inodes 4326 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4327 - -mat_aij_oneindex - Internally use indexing starting at 1 4328 rather than 0. Note that when calling MatSetValues(), 4329 the user still MUST index entries starting at 0! 4330 4331 4332 Example usage: 4333 4334 Consider the following 8x8 matrix with 34 non-zero values, that is 4335 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4336 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4337 as follows: 4338 4339 .vb 4340 1 2 0 | 0 3 0 | 0 4 4341 Proc0 0 5 6 | 7 0 0 | 8 0 4342 9 0 10 | 11 0 0 | 12 0 4343 ------------------------------------- 4344 13 0 14 | 15 16 17 | 0 0 4345 Proc1 0 18 0 | 19 20 21 | 0 0 4346 0 0 0 | 22 23 0 | 24 0 4347 ------------------------------------- 4348 Proc2 25 26 27 | 0 0 28 | 29 0 4349 30 0 0 | 31 32 33 | 0 34 4350 .ve 4351 4352 This can be represented as a collection of submatrices as: 4353 4354 .vb 4355 A B C 4356 D E F 4357 G H I 4358 .ve 4359 4360 Where the submatrices A,B,C are owned by proc0, D,E,F are 4361 owned by proc1, G,H,I are owned by proc2. 4362 4363 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4364 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4365 The 'M','N' parameters are 8,8, and have the same values on all procs. 4366 4367 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4368 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4369 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4370 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4371 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4372 matrix, ans [DF] as another SeqAIJ matrix. 4373 4374 When d_nz, o_nz parameters are specified, d_nz storage elements are 4375 allocated for every row of the local diagonal submatrix, and o_nz 4376 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4377 One way to choose d_nz and o_nz is to use the max nonzerors per local 4378 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4379 In this case, the values of d_nz,o_nz are: 4380 .vb 4381 proc0 : dnz = 2, o_nz = 2 4382 proc1 : dnz = 3, o_nz = 2 4383 proc2 : dnz = 1, o_nz = 4 4384 .ve 4385 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4386 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4387 for proc3. i.e we are using 12+15+10=37 storage locations to store 4388 34 values. 4389 4390 When d_nnz, o_nnz parameters are specified, the storage is specified 4391 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4392 In the above case the values for d_nnz,o_nnz are: 4393 .vb 4394 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4395 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4396 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4397 .ve 4398 Here the space allocated is sum of all the above values i.e 34, and 4399 hence pre-allocation is perfect. 4400 4401 Level: intermediate 4402 4403 .keywords: matrix, aij, compressed row, sparse, parallel 4404 4405 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4406 MPIAIJ, MatCreateMPIAIJWithArrays() 4407 @*/ 4408 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4409 { 4410 PetscErrorCode ierr; 4411 PetscMPIInt size; 4412 4413 PetscFunctionBegin; 4414 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4415 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4416 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4417 if (size > 1) { 4418 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4419 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4420 } else { 4421 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4422 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4423 } 4424 PetscFunctionReturn(0); 4425 } 4426 4427 #undef __FUNCT__ 4428 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4429 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4430 { 4431 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4432 4433 PetscFunctionBegin; 4434 *Ad = a->A; 4435 *Ao = a->B; 4436 *colmap = a->garray; 4437 PetscFunctionReturn(0); 4438 } 4439 4440 #undef __FUNCT__ 4441 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4442 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4443 { 4444 PetscErrorCode ierr; 4445 PetscInt i; 4446 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4447 4448 PetscFunctionBegin; 4449 if (coloring->ctype == IS_COLORING_GLOBAL) { 4450 ISColoringValue *allcolors,*colors; 4451 ISColoring ocoloring; 4452 4453 /* set coloring for diagonal portion */ 4454 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4455 4456 /* set coloring for off-diagonal portion */ 4457 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4458 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4459 for (i=0; i<a->B->cmap->n; i++) { 4460 colors[i] = allcolors[a->garray[i]]; 4461 } 4462 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4463 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4464 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4465 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4466 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4467 ISColoringValue *colors; 4468 PetscInt *larray; 4469 ISColoring ocoloring; 4470 4471 /* set coloring for diagonal portion */ 4472 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4473 for (i=0; i<a->A->cmap->n; i++) { 4474 larray[i] = i + A->cmap->rstart; 4475 } 4476 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4477 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4478 for (i=0; i<a->A->cmap->n; i++) { 4479 colors[i] = coloring->colors[larray[i]]; 4480 } 4481 ierr = PetscFree(larray);CHKERRQ(ierr); 4482 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4483 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4484 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4485 4486 /* set coloring for off-diagonal portion */ 4487 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4488 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4489 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4490 for (i=0; i<a->B->cmap->n; i++) { 4491 colors[i] = coloring->colors[larray[i]]; 4492 } 4493 ierr = PetscFree(larray);CHKERRQ(ierr); 4494 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4495 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4496 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4497 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4498 PetscFunctionReturn(0); 4499 } 4500 4501 #undef __FUNCT__ 4502 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4503 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4504 { 4505 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4506 PetscErrorCode ierr; 4507 4508 PetscFunctionBegin; 4509 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4510 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4511 PetscFunctionReturn(0); 4512 } 4513 4514 #undef __FUNCT__ 4515 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4516 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4517 { 4518 PetscErrorCode ierr; 4519 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4520 PetscInt *indx; 4521 4522 PetscFunctionBegin; 4523 /* This routine will ONLY return MPIAIJ type matrix */ 4524 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4525 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4526 if (n == PETSC_DECIDE) { 4527 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4528 } 4529 /* Check sum(n) = N */ 4530 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4531 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4532 4533 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4534 rstart -= m; 4535 4536 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4537 for (i=0; i<m; i++) { 4538 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4539 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4540 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4541 } 4542 4543 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4544 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4545 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4546 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4547 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4548 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4549 PetscFunctionReturn(0); 4550 } 4551 4552 #undef __FUNCT__ 4553 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4554 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4555 { 4556 PetscErrorCode ierr; 4557 PetscInt m,N,i,rstart,nnz,Ii; 4558 PetscInt *indx; 4559 PetscScalar *values; 4560 4561 PetscFunctionBegin; 4562 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4563 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4564 for (i=0; i<m; i++) { 4565 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4566 Ii = i + rstart; 4567 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4568 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4569 } 4570 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4571 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4572 PetscFunctionReturn(0); 4573 } 4574 4575 #undef __FUNCT__ 4576 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4577 /*@ 4578 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4579 matrices from each processor 4580 4581 Collective on MPI_Comm 4582 4583 Input Parameters: 4584 + comm - the communicators the parallel matrix will live on 4585 . inmat - the input sequential matrices 4586 . n - number of local columns (or PETSC_DECIDE) 4587 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4588 4589 Output Parameter: 4590 . outmat - the parallel matrix generated 4591 4592 Level: advanced 4593 4594 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4595 4596 @*/ 4597 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4598 { 4599 PetscErrorCode ierr; 4600 4601 PetscFunctionBegin; 4602 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4603 if (scall == MAT_INITIAL_MATRIX) { 4604 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4605 } 4606 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4607 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4608 PetscFunctionReturn(0); 4609 } 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "MatFileSplit" 4613 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4614 { 4615 PetscErrorCode ierr; 4616 PetscMPIInt rank; 4617 PetscInt m,N,i,rstart,nnz; 4618 size_t len; 4619 const PetscInt *indx; 4620 PetscViewer out; 4621 char *name; 4622 Mat B; 4623 const PetscScalar *values; 4624 4625 PetscFunctionBegin; 4626 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4627 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4628 /* Should this be the type of the diagonal block of A? */ 4629 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4630 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4631 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4632 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4633 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4634 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4635 for (i=0; i<m; i++) { 4636 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4637 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4638 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4639 } 4640 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4641 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4642 4643 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4644 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4645 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4646 sprintf(name,"%s.%d",outfile,rank); 4647 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4648 ierr = PetscFree(name);CHKERRQ(ierr); 4649 ierr = MatView(B,out);CHKERRQ(ierr); 4650 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4651 ierr = MatDestroy(&B);CHKERRQ(ierr); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4656 #undef __FUNCT__ 4657 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4658 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4659 { 4660 PetscErrorCode ierr; 4661 Mat_Merge_SeqsToMPI *merge; 4662 PetscContainer container; 4663 4664 PetscFunctionBegin; 4665 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4666 if (container) { 4667 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4669 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4670 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4671 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4672 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4673 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4674 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4675 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4676 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4677 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4678 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4679 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4680 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4681 ierr = PetscFree(merge);CHKERRQ(ierr); 4682 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4683 } 4684 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 #include <../src/mat/utils/freespace.h> 4689 #include <petscbt.h> 4690 4691 #undef __FUNCT__ 4692 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4693 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4694 { 4695 PetscErrorCode ierr; 4696 MPI_Comm comm; 4697 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4698 PetscMPIInt size,rank,taga,*len_s; 4699 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4700 PetscInt proc,m; 4701 PetscInt **buf_ri,**buf_rj; 4702 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4703 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4704 MPI_Request *s_waits,*r_waits; 4705 MPI_Status *status; 4706 MatScalar *aa=a->a; 4707 MatScalar **abuf_r,*ba_i; 4708 Mat_Merge_SeqsToMPI *merge; 4709 PetscContainer container; 4710 4711 PetscFunctionBegin; 4712 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4713 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4714 4715 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4716 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4717 4718 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4719 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4720 4721 bi = merge->bi; 4722 bj = merge->bj; 4723 buf_ri = merge->buf_ri; 4724 buf_rj = merge->buf_rj; 4725 4726 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4727 owners = merge->rowmap->range; 4728 len_s = merge->len_s; 4729 4730 /* send and recv matrix values */ 4731 /*-----------------------------*/ 4732 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4733 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4734 4735 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4736 for (proc=0,k=0; proc<size; proc++) { 4737 if (!len_s[proc]) continue; 4738 i = owners[proc]; 4739 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4740 k++; 4741 } 4742 4743 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4744 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4745 ierr = PetscFree(status);CHKERRQ(ierr); 4746 4747 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4748 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4749 4750 /* insert mat values of mpimat */ 4751 /*----------------------------*/ 4752 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4753 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4754 4755 for (k=0; k<merge->nrecv; k++) { 4756 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4757 nrows = *(buf_ri_k[k]); 4758 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4759 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4760 } 4761 4762 /* set values of ba */ 4763 m = merge->rowmap->n; 4764 for (i=0; i<m; i++) { 4765 arow = owners[rank] + i; 4766 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4767 bnzi = bi[i+1] - bi[i]; 4768 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4769 4770 /* add local non-zero vals of this proc's seqmat into ba */ 4771 anzi = ai[arow+1] - ai[arow]; 4772 aj = a->j + ai[arow]; 4773 aa = a->a + ai[arow]; 4774 nextaj = 0; 4775 for (j=0; nextaj<anzi; j++) { 4776 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4777 ba_i[j] += aa[nextaj++]; 4778 } 4779 } 4780 4781 /* add received vals into ba */ 4782 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4783 /* i-th row */ 4784 if (i == *nextrow[k]) { 4785 anzi = *(nextai[k]+1) - *nextai[k]; 4786 aj = buf_rj[k] + *(nextai[k]); 4787 aa = abuf_r[k] + *(nextai[k]); 4788 nextaj = 0; 4789 for (j=0; nextaj<anzi; j++) { 4790 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4791 ba_i[j] += aa[nextaj++]; 4792 } 4793 } 4794 nextrow[k]++; nextai[k]++; 4795 } 4796 } 4797 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4798 } 4799 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4800 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4801 4802 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4803 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4804 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4805 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4806 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4811 4812 #undef __FUNCT__ 4813 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4814 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4815 { 4816 PetscErrorCode ierr; 4817 Mat B_mpi; 4818 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4819 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4820 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4821 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4822 PetscInt len,proc,*dnz,*onz,bs,cbs; 4823 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4824 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4825 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4826 MPI_Status *status; 4827 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4828 PetscBT lnkbt; 4829 Mat_Merge_SeqsToMPI *merge; 4830 PetscContainer container; 4831 4832 PetscFunctionBegin; 4833 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4834 4835 /* make sure it is a PETSc comm */ 4836 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4838 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4839 4840 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4841 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4842 4843 /* determine row ownership */ 4844 /*---------------------------------------------------------*/ 4845 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4846 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4847 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4848 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4849 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4850 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4851 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4852 4853 m = merge->rowmap->n; 4854 owners = merge->rowmap->range; 4855 4856 /* determine the number of messages to send, their lengths */ 4857 /*---------------------------------------------------------*/ 4858 len_s = merge->len_s; 4859 4860 len = 0; /* length of buf_si[] */ 4861 merge->nsend = 0; 4862 for (proc=0; proc<size; proc++) { 4863 len_si[proc] = 0; 4864 if (proc == rank) { 4865 len_s[proc] = 0; 4866 } else { 4867 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4868 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4869 } 4870 if (len_s[proc]) { 4871 merge->nsend++; 4872 nrows = 0; 4873 for (i=owners[proc]; i<owners[proc+1]; i++) { 4874 if (ai[i+1] > ai[i]) nrows++; 4875 } 4876 len_si[proc] = 2*(nrows+1); 4877 len += len_si[proc]; 4878 } 4879 } 4880 4881 /* determine the number and length of messages to receive for ij-structure */ 4882 /*-------------------------------------------------------------------------*/ 4883 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4884 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4885 4886 /* post the Irecv of j-structure */ 4887 /*-------------------------------*/ 4888 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4889 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4890 4891 /* post the Isend of j-structure */ 4892 /*--------------------------------*/ 4893 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4894 4895 for (proc=0, k=0; proc<size; proc++) { 4896 if (!len_s[proc]) continue; 4897 i = owners[proc]; 4898 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4899 k++; 4900 } 4901 4902 /* receives and sends of j-structure are complete */ 4903 /*------------------------------------------------*/ 4904 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4905 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4906 4907 /* send and recv i-structure */ 4908 /*---------------------------*/ 4909 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4910 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4911 4912 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4913 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4914 for (proc=0,k=0; proc<size; proc++) { 4915 if (!len_s[proc]) continue; 4916 /* form outgoing message for i-structure: 4917 buf_si[0]: nrows to be sent 4918 [1:nrows]: row index (global) 4919 [nrows+1:2*nrows+1]: i-structure index 4920 */ 4921 /*-------------------------------------------*/ 4922 nrows = len_si[proc]/2 - 1; 4923 buf_si_i = buf_si + nrows+1; 4924 buf_si[0] = nrows; 4925 buf_si_i[0] = 0; 4926 nrows = 0; 4927 for (i=owners[proc]; i<owners[proc+1]; i++) { 4928 anzi = ai[i+1] - ai[i]; 4929 if (anzi) { 4930 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4931 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4932 nrows++; 4933 } 4934 } 4935 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4936 k++; 4937 buf_si += len_si[proc]; 4938 } 4939 4940 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4941 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4942 4943 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4944 for (i=0; i<merge->nrecv; i++) { 4945 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4946 } 4947 4948 ierr = PetscFree(len_si);CHKERRQ(ierr); 4949 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4950 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4951 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4952 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4953 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4954 ierr = PetscFree(status);CHKERRQ(ierr); 4955 4956 /* compute a local seq matrix in each processor */ 4957 /*----------------------------------------------*/ 4958 /* allocate bi array and free space for accumulating nonzero column info */ 4959 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4960 bi[0] = 0; 4961 4962 /* create and initialize a linked list */ 4963 nlnk = N+1; 4964 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4965 4966 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4967 len = ai[owners[rank+1]] - ai[owners[rank]]; 4968 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4969 4970 current_space = free_space; 4971 4972 /* determine symbolic info for each local row */ 4973 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4974 4975 for (k=0; k<merge->nrecv; k++) { 4976 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4977 nrows = *buf_ri_k[k]; 4978 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4979 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4980 } 4981 4982 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4983 len = 0; 4984 for (i=0; i<m; i++) { 4985 bnzi = 0; 4986 /* add local non-zero cols of this proc's seqmat into lnk */ 4987 arow = owners[rank] + i; 4988 anzi = ai[arow+1] - ai[arow]; 4989 aj = a->j + ai[arow]; 4990 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4991 bnzi += nlnk; 4992 /* add received col data into lnk */ 4993 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4994 if (i == *nextrow[k]) { /* i-th row */ 4995 anzi = *(nextai[k]+1) - *nextai[k]; 4996 aj = buf_rj[k] + *nextai[k]; 4997 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4998 bnzi += nlnk; 4999 nextrow[k]++; nextai[k]++; 5000 } 5001 } 5002 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5003 5004 /* if free space is not available, make more free space */ 5005 if (current_space->local_remaining<bnzi) { 5006 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5007 nspacedouble++; 5008 } 5009 /* copy data into free space, then initialize lnk */ 5010 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5011 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5012 5013 current_space->array += bnzi; 5014 current_space->local_used += bnzi; 5015 current_space->local_remaining -= bnzi; 5016 5017 bi[i+1] = bi[i] + bnzi; 5018 } 5019 5020 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5021 5022 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5023 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5024 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5025 5026 /* create symbolic parallel matrix B_mpi */ 5027 /*---------------------------------------*/ 5028 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5029 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5030 if (n==PETSC_DECIDE) { 5031 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5032 } else { 5033 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5034 } 5035 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5036 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5037 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5038 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5039 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5040 5041 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5042 B_mpi->assembled = PETSC_FALSE; 5043 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5044 merge->bi = bi; 5045 merge->bj = bj; 5046 merge->buf_ri = buf_ri; 5047 merge->buf_rj = buf_rj; 5048 merge->coi = NULL; 5049 merge->coj = NULL; 5050 merge->owners_co = NULL; 5051 5052 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5053 5054 /* attach the supporting struct to B_mpi for reuse */ 5055 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5056 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5057 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5058 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5059 *mpimat = B_mpi; 5060 5061 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5062 PetscFunctionReturn(0); 5063 } 5064 5065 #undef __FUNCT__ 5066 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5067 /*@C 5068 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5069 matrices from each processor 5070 5071 Collective on MPI_Comm 5072 5073 Input Parameters: 5074 + comm - the communicators the parallel matrix will live on 5075 . seqmat - the input sequential matrices 5076 . m - number of local rows (or PETSC_DECIDE) 5077 . n - number of local columns (or PETSC_DECIDE) 5078 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5079 5080 Output Parameter: 5081 . mpimat - the parallel matrix generated 5082 5083 Level: advanced 5084 5085 Notes: 5086 The dimensions of the sequential matrix in each processor MUST be the same. 5087 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5088 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5089 @*/ 5090 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5091 { 5092 PetscErrorCode ierr; 5093 PetscMPIInt size; 5094 5095 PetscFunctionBegin; 5096 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5097 if (size == 1) { 5098 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5099 if (scall == MAT_INITIAL_MATRIX) { 5100 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5101 } else { 5102 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5103 } 5104 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5108 if (scall == MAT_INITIAL_MATRIX) { 5109 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5110 } 5111 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5112 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5113 PetscFunctionReturn(0); 5114 } 5115 5116 #undef __FUNCT__ 5117 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5118 /*@ 5119 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5120 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5121 with MatGetSize() 5122 5123 Not Collective 5124 5125 Input Parameters: 5126 + A - the matrix 5127 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5128 5129 Output Parameter: 5130 . A_loc - the local sequential matrix generated 5131 5132 Level: developer 5133 5134 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5135 5136 @*/ 5137 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5138 { 5139 PetscErrorCode ierr; 5140 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5141 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5142 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5143 MatScalar *aa=a->a,*ba=b->a,*cam; 5144 PetscScalar *ca; 5145 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5146 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5147 PetscBool match; 5148 5149 PetscFunctionBegin; 5150 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5151 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5152 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5155 ci[0] = 0; 5156 for (i=0; i<am; i++) { 5157 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5158 } 5159 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5160 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5161 k = 0; 5162 for (i=0; i<am; i++) { 5163 ncols_o = bi[i+1] - bi[i]; 5164 ncols_d = ai[i+1] - ai[i]; 5165 /* off-diagonal portion of A */ 5166 for (jo=0; jo<ncols_o; jo++) { 5167 col = cmap[*bj]; 5168 if (col >= cstart) break; 5169 cj[k] = col; bj++; 5170 ca[k++] = *ba++; 5171 } 5172 /* diagonal portion of A */ 5173 for (j=0; j<ncols_d; j++) { 5174 cj[k] = cstart + *aj++; 5175 ca[k++] = *aa++; 5176 } 5177 /* off-diagonal portion of A */ 5178 for (j=jo; j<ncols_o; j++) { 5179 cj[k] = cmap[*bj++]; 5180 ca[k++] = *ba++; 5181 } 5182 } 5183 /* put together the new matrix */ 5184 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5185 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5186 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5187 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5188 mat->free_a = PETSC_TRUE; 5189 mat->free_ij = PETSC_TRUE; 5190 mat->nonew = 0; 5191 } else if (scall == MAT_REUSE_MATRIX) { 5192 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5193 ci = mat->i; cj = mat->j; cam = mat->a; 5194 for (i=0; i<am; i++) { 5195 /* off-diagonal portion of A */ 5196 ncols_o = bi[i+1] - bi[i]; 5197 for (jo=0; jo<ncols_o; jo++) { 5198 col = cmap[*bj]; 5199 if (col >= cstart) break; 5200 *cam++ = *ba++; bj++; 5201 } 5202 /* diagonal portion of A */ 5203 ncols_d = ai[i+1] - ai[i]; 5204 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5205 /* off-diagonal portion of A */ 5206 for (j=jo; j<ncols_o; j++) { 5207 *cam++ = *ba++; bj++; 5208 } 5209 } 5210 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5211 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5212 PetscFunctionReturn(0); 5213 } 5214 5215 #undef __FUNCT__ 5216 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5217 /*@C 5218 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5219 5220 Not Collective 5221 5222 Input Parameters: 5223 + A - the matrix 5224 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5225 - row, col - index sets of rows and columns to extract (or NULL) 5226 5227 Output Parameter: 5228 . A_loc - the local sequential matrix generated 5229 5230 Level: developer 5231 5232 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5233 5234 @*/ 5235 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5236 { 5237 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5238 PetscErrorCode ierr; 5239 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5240 IS isrowa,iscola; 5241 Mat *aloc; 5242 PetscBool match; 5243 5244 PetscFunctionBegin; 5245 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5246 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5247 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5248 if (!row) { 5249 start = A->rmap->rstart; end = A->rmap->rend; 5250 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5251 } else { 5252 isrowa = *row; 5253 } 5254 if (!col) { 5255 start = A->cmap->rstart; 5256 cmap = a->garray; 5257 nzA = a->A->cmap->n; 5258 nzB = a->B->cmap->n; 5259 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5260 ncols = 0; 5261 for (i=0; i<nzB; i++) { 5262 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5263 else break; 5264 } 5265 imark = i; 5266 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5267 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5268 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5269 } else { 5270 iscola = *col; 5271 } 5272 if (scall != MAT_INITIAL_MATRIX) { 5273 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5274 aloc[0] = *A_loc; 5275 } 5276 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5277 *A_loc = aloc[0]; 5278 ierr = PetscFree(aloc);CHKERRQ(ierr); 5279 if (!row) { 5280 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5281 } 5282 if (!col) { 5283 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5284 } 5285 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5286 PetscFunctionReturn(0); 5287 } 5288 5289 #undef __FUNCT__ 5290 #define __FUNCT__ "MatGetBrowsOfAcols" 5291 /*@C 5292 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5293 5294 Collective on Mat 5295 5296 Input Parameters: 5297 + A,B - the matrices in mpiaij format 5298 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5299 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5300 5301 Output Parameter: 5302 + rowb, colb - index sets of rows and columns of B to extract 5303 - B_seq - the sequential matrix generated 5304 5305 Level: developer 5306 5307 @*/ 5308 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5309 { 5310 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5311 PetscErrorCode ierr; 5312 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5313 IS isrowb,iscolb; 5314 Mat *bseq=NULL; 5315 5316 PetscFunctionBegin; 5317 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5318 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5319 } 5320 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5321 5322 if (scall == MAT_INITIAL_MATRIX) { 5323 start = A->cmap->rstart; 5324 cmap = a->garray; 5325 nzA = a->A->cmap->n; 5326 nzB = a->B->cmap->n; 5327 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5328 ncols = 0; 5329 for (i=0; i<nzB; i++) { /* row < local row index */ 5330 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5331 else break; 5332 } 5333 imark = i; 5334 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5335 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5336 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5337 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5338 } else { 5339 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5340 isrowb = *rowb; iscolb = *colb; 5341 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5342 bseq[0] = *B_seq; 5343 } 5344 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5345 *B_seq = bseq[0]; 5346 ierr = PetscFree(bseq);CHKERRQ(ierr); 5347 if (!rowb) { 5348 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5349 } else { 5350 *rowb = isrowb; 5351 } 5352 if (!colb) { 5353 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5354 } else { 5355 *colb = iscolb; 5356 } 5357 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5358 PetscFunctionReturn(0); 5359 } 5360 5361 #undef __FUNCT__ 5362 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5363 /* 5364 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5365 of the OFF-DIAGONAL portion of local A 5366 5367 Collective on Mat 5368 5369 Input Parameters: 5370 + A,B - the matrices in mpiaij format 5371 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5372 5373 Output Parameter: 5374 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5375 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5376 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5377 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5378 5379 Level: developer 5380 5381 */ 5382 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5383 { 5384 VecScatter_MPI_General *gen_to,*gen_from; 5385 PetscErrorCode ierr; 5386 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5387 Mat_SeqAIJ *b_oth; 5388 VecScatter ctx =a->Mvctx; 5389 MPI_Comm comm; 5390 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5391 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5392 PetscScalar *rvalues,*svalues; 5393 MatScalar *b_otha,*bufa,*bufA; 5394 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5395 MPI_Request *rwaits = NULL,*swaits = NULL; 5396 MPI_Status *sstatus,rstatus; 5397 PetscMPIInt jj; 5398 PetscInt *cols,sbs,rbs; 5399 PetscScalar *vals; 5400 5401 PetscFunctionBegin; 5402 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5403 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5404 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5405 } 5406 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5407 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5408 5409 gen_to = (VecScatter_MPI_General*)ctx->todata; 5410 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5411 rvalues = gen_from->values; /* holds the length of receiving row */ 5412 svalues = gen_to->values; /* holds the length of sending row */ 5413 nrecvs = gen_from->n; 5414 nsends = gen_to->n; 5415 5416 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5417 srow = gen_to->indices; /* local row index to be sent */ 5418 sstarts = gen_to->starts; 5419 sprocs = gen_to->procs; 5420 sstatus = gen_to->sstatus; 5421 sbs = gen_to->bs; 5422 rstarts = gen_from->starts; 5423 rprocs = gen_from->procs; 5424 rbs = gen_from->bs; 5425 5426 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5427 if (scall == MAT_INITIAL_MATRIX) { 5428 /* i-array */ 5429 /*---------*/ 5430 /* post receives */ 5431 for (i=0; i<nrecvs; i++) { 5432 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5433 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5434 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5435 } 5436 5437 /* pack the outgoing message */ 5438 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5439 5440 sstartsj[0] = 0; 5441 rstartsj[0] = 0; 5442 len = 0; /* total length of j or a array to be sent */ 5443 k = 0; 5444 for (i=0; i<nsends; i++) { 5445 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5446 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5447 for (j=0; j<nrows; j++) { 5448 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5449 for (l=0; l<sbs; l++) { 5450 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5451 5452 rowlen[j*sbs+l] = ncols; 5453 5454 len += ncols; 5455 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5456 } 5457 k++; 5458 } 5459 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5460 5461 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5462 } 5463 /* recvs and sends of i-array are completed */ 5464 i = nrecvs; 5465 while (i--) { 5466 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5467 } 5468 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5469 5470 /* allocate buffers for sending j and a arrays */ 5471 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5472 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5473 5474 /* create i-array of B_oth */ 5475 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5476 5477 b_othi[0] = 0; 5478 len = 0; /* total length of j or a array to be received */ 5479 k = 0; 5480 for (i=0; i<nrecvs; i++) { 5481 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5482 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5483 for (j=0; j<nrows; j++) { 5484 b_othi[k+1] = b_othi[k] + rowlen[j]; 5485 len += rowlen[j]; k++; 5486 } 5487 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5488 } 5489 5490 /* allocate space for j and a arrrays of B_oth */ 5491 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5492 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5493 5494 /* j-array */ 5495 /*---------*/ 5496 /* post receives of j-array */ 5497 for (i=0; i<nrecvs; i++) { 5498 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5499 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5500 } 5501 5502 /* pack the outgoing message j-array */ 5503 k = 0; 5504 for (i=0; i<nsends; i++) { 5505 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5506 bufJ = bufj+sstartsj[i]; 5507 for (j=0; j<nrows; j++) { 5508 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5509 for (ll=0; ll<sbs; ll++) { 5510 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5511 for (l=0; l<ncols; l++) { 5512 *bufJ++ = cols[l]; 5513 } 5514 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5515 } 5516 } 5517 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5518 } 5519 5520 /* recvs and sends of j-array are completed */ 5521 i = nrecvs; 5522 while (i--) { 5523 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5524 } 5525 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5526 } else if (scall == MAT_REUSE_MATRIX) { 5527 sstartsj = *startsj_s; 5528 rstartsj = *startsj_r; 5529 bufa = *bufa_ptr; 5530 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5531 b_otha = b_oth->a; 5532 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5533 5534 /* a-array */ 5535 /*---------*/ 5536 /* post receives of a-array */ 5537 for (i=0; i<nrecvs; i++) { 5538 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5539 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5540 } 5541 5542 /* pack the outgoing message a-array */ 5543 k = 0; 5544 for (i=0; i<nsends; i++) { 5545 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5546 bufA = bufa+sstartsj[i]; 5547 for (j=0; j<nrows; j++) { 5548 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5549 for (ll=0; ll<sbs; ll++) { 5550 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5551 for (l=0; l<ncols; l++) { 5552 *bufA++ = vals[l]; 5553 } 5554 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5555 } 5556 } 5557 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5558 } 5559 /* recvs and sends of a-array are completed */ 5560 i = nrecvs; 5561 while (i--) { 5562 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5563 } 5564 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5565 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5566 5567 if (scall == MAT_INITIAL_MATRIX) { 5568 /* put together the new matrix */ 5569 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5570 5571 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5572 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5573 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5574 b_oth->free_a = PETSC_TRUE; 5575 b_oth->free_ij = PETSC_TRUE; 5576 b_oth->nonew = 0; 5577 5578 ierr = PetscFree(bufj);CHKERRQ(ierr); 5579 if (!startsj_s || !bufa_ptr) { 5580 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5581 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5582 } else { 5583 *startsj_s = sstartsj; 5584 *startsj_r = rstartsj; 5585 *bufa_ptr = bufa; 5586 } 5587 } 5588 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5589 PetscFunctionReturn(0); 5590 } 5591 5592 #undef __FUNCT__ 5593 #define __FUNCT__ "MatGetCommunicationStructs" 5594 /*@C 5595 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5596 5597 Not Collective 5598 5599 Input Parameters: 5600 . A - The matrix in mpiaij format 5601 5602 Output Parameter: 5603 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5604 . colmap - A map from global column index to local index into lvec 5605 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5606 5607 Level: developer 5608 5609 @*/ 5610 #if defined(PETSC_USE_CTABLE) 5611 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5612 #else 5613 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5614 #endif 5615 { 5616 Mat_MPIAIJ *a; 5617 5618 PetscFunctionBegin; 5619 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5620 PetscValidPointer(lvec, 2); 5621 PetscValidPointer(colmap, 3); 5622 PetscValidPointer(multScatter, 4); 5623 a = (Mat_MPIAIJ*) A->data; 5624 if (lvec) *lvec = a->lvec; 5625 if (colmap) *colmap = a->colmap; 5626 if (multScatter) *multScatter = a->Mvctx; 5627 PetscFunctionReturn(0); 5628 } 5629 5630 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5631 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5632 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5633 5634 #undef __FUNCT__ 5635 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5636 /* 5637 Computes (B'*A')' since computing B*A directly is untenable 5638 5639 n p p 5640 ( ) ( ) ( ) 5641 m ( A ) * n ( B ) = m ( C ) 5642 ( ) ( ) ( ) 5643 5644 */ 5645 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5646 { 5647 PetscErrorCode ierr; 5648 Mat At,Bt,Ct; 5649 5650 PetscFunctionBegin; 5651 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5652 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5653 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5654 ierr = MatDestroy(&At);CHKERRQ(ierr); 5655 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5656 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5657 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5658 PetscFunctionReturn(0); 5659 } 5660 5661 #undef __FUNCT__ 5662 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5663 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5664 { 5665 PetscErrorCode ierr; 5666 PetscInt m=A->rmap->n,n=B->cmap->n; 5667 Mat Cmat; 5668 5669 PetscFunctionBegin; 5670 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5671 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5672 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5673 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5674 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5675 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5676 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5677 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5678 5679 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5680 5681 *C = Cmat; 5682 PetscFunctionReturn(0); 5683 } 5684 5685 /* ----------------------------------------------------------------*/ 5686 #undef __FUNCT__ 5687 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5688 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5689 { 5690 PetscErrorCode ierr; 5691 5692 PetscFunctionBegin; 5693 if (scall == MAT_INITIAL_MATRIX) { 5694 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5695 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5696 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5697 } 5698 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5699 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5700 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5701 PetscFunctionReturn(0); 5702 } 5703 5704 #if defined(PETSC_HAVE_MUMPS) 5705 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5706 #endif 5707 #if defined(PETSC_HAVE_PASTIX) 5708 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5709 #endif 5710 #if defined(PETSC_HAVE_SUPERLU_DIST) 5711 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5712 #endif 5713 #if defined(PETSC_HAVE_CLIQUE) 5714 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5715 #endif 5716 5717 /*MC 5718 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5719 5720 Options Database Keys: 5721 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5722 5723 Level: beginner 5724 5725 .seealso: MatCreateAIJ() 5726 M*/ 5727 5728 #undef __FUNCT__ 5729 #define __FUNCT__ "MatCreate_MPIAIJ" 5730 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5731 { 5732 Mat_MPIAIJ *b; 5733 PetscErrorCode ierr; 5734 PetscMPIInt size; 5735 5736 PetscFunctionBegin; 5737 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5738 5739 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5740 B->data = (void*)b; 5741 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5742 B->assembled = PETSC_FALSE; 5743 B->insertmode = NOT_SET_VALUES; 5744 b->size = size; 5745 5746 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5747 5748 /* build cache for off array entries formed */ 5749 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5750 5751 b->donotstash = PETSC_FALSE; 5752 b->colmap = 0; 5753 b->garray = 0; 5754 b->roworiented = PETSC_TRUE; 5755 5756 /* stuff used for matrix vector multiply */ 5757 b->lvec = NULL; 5758 b->Mvctx = NULL; 5759 5760 /* stuff for MatGetRow() */ 5761 b->rowindices = 0; 5762 b->rowvalues = 0; 5763 b->getrowactive = PETSC_FALSE; 5764 5765 /* flexible pointer used in CUSP/CUSPARSE classes */ 5766 b->spptr = NULL; 5767 5768 #if defined(PETSC_HAVE_MUMPS) 5769 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5770 #endif 5771 #if defined(PETSC_HAVE_PASTIX) 5772 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5773 #endif 5774 #if defined(PETSC_HAVE_SUPERLU_DIST) 5775 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5776 #endif 5777 #if defined(PETSC_HAVE_CLIQUE) 5778 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5779 #endif 5780 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5781 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5782 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5783 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5784 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5785 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5786 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5787 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5788 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5789 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5790 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5791 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5792 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5793 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5794 PetscFunctionReturn(0); 5795 } 5796 5797 #undef __FUNCT__ 5798 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5799 /*@ 5800 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5801 and "off-diagonal" part of the matrix in CSR format. 5802 5803 Collective on MPI_Comm 5804 5805 Input Parameters: 5806 + comm - MPI communicator 5807 . m - number of local rows (Cannot be PETSC_DECIDE) 5808 . n - This value should be the same as the local size used in creating the 5809 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5810 calculated if N is given) For square matrices n is almost always m. 5811 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5812 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5813 . i - row indices for "diagonal" portion of matrix 5814 . j - column indices 5815 . a - matrix values 5816 . oi - row indices for "off-diagonal" portion of matrix 5817 . oj - column indices 5818 - oa - matrix values 5819 5820 Output Parameter: 5821 . mat - the matrix 5822 5823 Level: advanced 5824 5825 Notes: 5826 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5827 must free the arrays once the matrix has been destroyed and not before. 5828 5829 The i and j indices are 0 based 5830 5831 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5832 5833 This sets local rows and cannot be used to set off-processor values. 5834 5835 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5836 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5837 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5838 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5839 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5840 communication if it is known that only local entries will be set. 5841 5842 .keywords: matrix, aij, compressed row, sparse, parallel 5843 5844 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5845 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5846 @*/ 5847 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5848 { 5849 PetscErrorCode ierr; 5850 Mat_MPIAIJ *maij; 5851 5852 PetscFunctionBegin; 5853 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5854 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5855 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5856 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5857 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5858 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5859 maij = (Mat_MPIAIJ*) (*mat)->data; 5860 5861 (*mat)->preallocated = PETSC_TRUE; 5862 5863 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5864 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5865 5866 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5867 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5868 5869 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5870 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5871 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5872 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5873 5874 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5875 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5876 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5877 PetscFunctionReturn(0); 5878 } 5879 5880 /* 5881 Special version for direct calls from Fortran 5882 */ 5883 #include <petsc-private/fortranimpl.h> 5884 5885 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5886 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5887 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5888 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5889 #endif 5890 5891 /* Change these macros so can be used in void function */ 5892 #undef CHKERRQ 5893 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5894 #undef SETERRQ2 5895 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5896 #undef SETERRQ3 5897 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5898 #undef SETERRQ 5899 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5900 5901 #undef __FUNCT__ 5902 #define __FUNCT__ "matsetvaluesmpiaij_" 5903 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5904 { 5905 Mat mat = *mmat; 5906 PetscInt m = *mm, n = *mn; 5907 InsertMode addv = *maddv; 5908 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5909 PetscScalar value; 5910 PetscErrorCode ierr; 5911 5912 MatCheckPreallocated(mat,1); 5913 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5914 5915 #if defined(PETSC_USE_DEBUG) 5916 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5917 #endif 5918 { 5919 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5920 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5921 PetscBool roworiented = aij->roworiented; 5922 5923 /* Some Variables required in the macro */ 5924 Mat A = aij->A; 5925 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5926 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5927 MatScalar *aa = a->a; 5928 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5929 Mat B = aij->B; 5930 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5931 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5932 MatScalar *ba = b->a; 5933 5934 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5935 PetscInt nonew = a->nonew; 5936 MatScalar *ap1,*ap2; 5937 5938 PetscFunctionBegin; 5939 for (i=0; i<m; i++) { 5940 if (im[i] < 0) continue; 5941 #if defined(PETSC_USE_DEBUG) 5942 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5943 #endif 5944 if (im[i] >= rstart && im[i] < rend) { 5945 row = im[i] - rstart; 5946 lastcol1 = -1; 5947 rp1 = aj + ai[row]; 5948 ap1 = aa + ai[row]; 5949 rmax1 = aimax[row]; 5950 nrow1 = ailen[row]; 5951 low1 = 0; 5952 high1 = nrow1; 5953 lastcol2 = -1; 5954 rp2 = bj + bi[row]; 5955 ap2 = ba + bi[row]; 5956 rmax2 = bimax[row]; 5957 nrow2 = bilen[row]; 5958 low2 = 0; 5959 high2 = nrow2; 5960 5961 for (j=0; j<n; j++) { 5962 if (roworiented) value = v[i*n+j]; 5963 else value = v[i+j*m]; 5964 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5965 if (in[j] >= cstart && in[j] < cend) { 5966 col = in[j] - cstart; 5967 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5968 } else if (in[j] < 0) continue; 5969 #if defined(PETSC_USE_DEBUG) 5970 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5971 #endif 5972 else { 5973 if (mat->was_assembled) { 5974 if (!aij->colmap) { 5975 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5976 } 5977 #if defined(PETSC_USE_CTABLE) 5978 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5979 col--; 5980 #else 5981 col = aij->colmap[in[j]] - 1; 5982 #endif 5983 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5984 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5985 col = in[j]; 5986 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5987 B = aij->B; 5988 b = (Mat_SeqAIJ*)B->data; 5989 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5990 rp2 = bj + bi[row]; 5991 ap2 = ba + bi[row]; 5992 rmax2 = bimax[row]; 5993 nrow2 = bilen[row]; 5994 low2 = 0; 5995 high2 = nrow2; 5996 bm = aij->B->rmap->n; 5997 ba = b->a; 5998 } 5999 } else col = in[j]; 6000 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6001 } 6002 } 6003 } else if (!aij->donotstash) { 6004 if (roworiented) { 6005 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6006 } else { 6007 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6008 } 6009 } 6010 } 6011 } 6012 PetscFunctionReturnVoid(); 6013 } 6014 6015