1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 if (redund->matseq) { /* via MatGetSubMatrices() */ 2530 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2531 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2532 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2534 } else { 2535 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2537 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2538 for (i=0; i<redund->nrecvs; i++) { 2539 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2541 } 2542 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2543 } 2544 2545 if (redund->psubcomm) { 2546 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2547 } 2548 ierr = redund->Destroy(A);CHKERRQ(ierr); 2549 ierr = PetscFree(redund);CHKERRQ(ierr); 2550 } 2551 PetscFunctionReturn(0); 2552 } 2553 2554 #undef __FUNCT__ 2555 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2556 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2557 { 2558 PetscMPIInt rank,size; 2559 MPI_Comm comm,subcomm=psubcomm->comm; 2560 PetscErrorCode ierr; 2561 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2562 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2563 PetscInt *rowrange = mat->rmap->range; 2564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2565 Mat A = aij->A,B=aij->B,C=*matredundant; 2566 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2567 PetscScalar *sbuf_a; 2568 PetscInt nzlocal=a->nz+b->nz; 2569 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2570 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2571 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2572 MatScalar *aworkA,*aworkB; 2573 PetscScalar *vals; 2574 PetscMPIInt tag1,tag2,tag3,imdex; 2575 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2576 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2577 MPI_Status recv_status,*send_status; 2578 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2579 PetscInt **rbuf_j=NULL; 2580 PetscScalar **rbuf_a=NULL; 2581 Mat_Redundant *redund =NULL; 2582 2583 PetscFunctionBegin; 2584 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2585 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2586 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2587 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2588 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2589 2590 if (reuse == MAT_REUSE_MATRIX) { 2591 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2592 if (subsize == 1) { 2593 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2594 redund = c->redundant; 2595 } else { 2596 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2597 redund = c->redundant; 2598 } 2599 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2600 2601 nsends = redund->nsends; 2602 nrecvs = redund->nrecvs; 2603 send_rank = redund->send_rank; 2604 recv_rank = redund->recv_rank; 2605 sbuf_nz = redund->sbuf_nz; 2606 rbuf_nz = redund->rbuf_nz; 2607 sbuf_j = redund->sbuf_j; 2608 sbuf_a = redund->sbuf_a; 2609 rbuf_j = redund->rbuf_j; 2610 rbuf_a = redund->rbuf_a; 2611 } 2612 2613 if (reuse == MAT_INITIAL_MATRIX) { 2614 PetscInt nleftover,np_subcomm; 2615 2616 /* get the destination processors' id send_rank, nsends and nrecvs */ 2617 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2618 2619 np_subcomm = size/nsubcomm; 2620 nleftover = size - nsubcomm*np_subcomm; 2621 2622 nsends = 0; nrecvs = 0; 2623 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2624 /* -------------------------------------------*/ 2625 for (i=0; i<size; i++) { 2626 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2627 send_rank[nsends] = i; nsends++; 2628 recv_rank[nrecvs++] = i; 2629 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2630 } 2631 } 2632 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2633 i = size-nleftover-1; 2634 j = 0; 2635 while (j < nsubcomm - nleftover) { 2636 send_rank[nsends++] = i; 2637 i--; j++; 2638 /* printf("[%d] send to [%d]\n",rank,i); */ 2639 } 2640 } 2641 2642 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2643 for (i=0; i<nleftover; i++) { 2644 recv_rank[nrecvs++] = size-nleftover+i; 2645 /* printf("[%d] recv from [%d]\n",rank,i); */ 2646 } 2647 } 2648 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2649 /* --------------------------------------------------*/ 2650 PetscInt color,subcommstart; 2651 subcommstart=0; 2652 for (color=0; color<nsubcomm; color++) { 2653 if (psubcomm->color != color) { 2654 for (i=0; i<psubcomm->subsize[color]; i++) { 2655 if (subrank == i) { /* my_subrank == other's subrank */ 2656 send_rank[nsends++] = subcommstart+i; 2657 recv_rank[nrecvs++] = subcommstart+i; 2658 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2659 } 2660 } 2661 } 2662 subcommstart += psubcomm->subsize[color]; 2663 } 2664 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2665 subcommstart=0; 2666 for (color=0; color<nsubcomm; color++) { 2667 subcommstart += psubcomm->subsize[color]; 2668 if (psubcomm->color == color) continue; 2669 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2670 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2671 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2672 } 2673 } 2674 } 2675 2676 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2677 subcommstart=0; 2678 for (color=0; color<nsubcomm; color++) { 2679 subcommstart += psubcomm->subsize[color]; 2680 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2681 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2682 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2683 } 2684 } 2685 } 2686 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type); 2687 2688 /* allocate sbuf_j, sbuf_a */ 2689 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2690 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2691 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2692 /* 2693 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2694 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2695 */ 2696 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2697 2698 /* copy mat's local entries into the buffers */ 2699 if (reuse == MAT_INITIAL_MATRIX) { 2700 rownz_max = 0; 2701 rptr = sbuf_j; 2702 cols = sbuf_j + rend-rstart + 1; 2703 vals = sbuf_a; 2704 rptr[0] = 0; 2705 for (i=0; i<rend-rstart; i++) { 2706 row = i + rstart; 2707 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2708 ncols = nzA + nzB; 2709 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2710 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2711 /* load the column indices for this row into cols */ 2712 lwrite = 0; 2713 for (l=0; l<nzB; l++) { 2714 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2715 vals[lwrite] = aworkB[l]; 2716 cols[lwrite++] = ctmp; 2717 } 2718 } 2719 for (l=0; l<nzA; l++) { 2720 vals[lwrite] = aworkA[l]; 2721 cols[lwrite++] = cstart + cworkA[l]; 2722 } 2723 for (l=0; l<nzB; l++) { 2724 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2725 vals[lwrite] = aworkB[l]; 2726 cols[lwrite++] = ctmp; 2727 } 2728 } 2729 vals += ncols; 2730 cols += ncols; 2731 rptr[i+1] = rptr[i] + ncols; 2732 if (rownz_max < ncols) rownz_max = ncols; 2733 } 2734 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2735 } else { /* only copy matrix values into sbuf_a */ 2736 rptr = sbuf_j; 2737 vals = sbuf_a; 2738 rptr[0] = 0; 2739 for (i=0; i<rend-rstart; i++) { 2740 row = i + rstart; 2741 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2742 ncols = nzA + nzB; 2743 cworkB = b->j + b->i[i]; 2744 aworkA = a->a + a->i[i]; 2745 aworkB = b->a + b->i[i]; 2746 lwrite = 0; 2747 for (l=0; l<nzB; l++) { 2748 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2749 } 2750 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2751 for (l=0; l<nzB; l++) { 2752 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2753 } 2754 vals += ncols; 2755 rptr[i+1] = rptr[i] + ncols; 2756 } 2757 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2758 2759 /* send nzlocal to others, and recv other's nzlocal */ 2760 /*--------------------------------------------------*/ 2761 if (reuse == MAT_INITIAL_MATRIX) { 2762 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2763 2764 s_waits2 = s_waits3 + nsends; 2765 s_waits1 = s_waits2 + nsends; 2766 r_waits1 = s_waits1 + nsends; 2767 r_waits2 = r_waits1 + nrecvs; 2768 r_waits3 = r_waits2 + nrecvs; 2769 } else { 2770 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2771 2772 r_waits3 = s_waits3 + nsends; 2773 } 2774 2775 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2776 if (reuse == MAT_INITIAL_MATRIX) { 2777 /* get new tags to keep the communication clean */ 2778 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2779 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2780 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2781 2782 /* post receives of other's nzlocal */ 2783 for (i=0; i<nrecvs; i++) { 2784 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2785 } 2786 /* send nzlocal to others */ 2787 for (i=0; i<nsends; i++) { 2788 sbuf_nz[i] = nzlocal; 2789 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2790 } 2791 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2792 count = nrecvs; 2793 while (count) { 2794 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2795 2796 recv_rank[imdex] = recv_status.MPI_SOURCE; 2797 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2798 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2799 2800 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2801 2802 rbuf_nz[imdex] += i + 2; 2803 2804 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2805 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2806 count--; 2807 } 2808 /* wait on sends of nzlocal */ 2809 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2810 /* send mat->i,j to others, and recv from other's */ 2811 /*------------------------------------------------*/ 2812 for (i=0; i<nsends; i++) { 2813 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2814 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2815 } 2816 /* wait on receives of mat->i,j */ 2817 /*------------------------------*/ 2818 count = nrecvs; 2819 while (count) { 2820 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2821 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2822 count--; 2823 } 2824 /* wait on sends of mat->i,j */ 2825 /*---------------------------*/ 2826 if (nsends) { 2827 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2828 } 2829 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2830 2831 /* post receives, send and receive mat->a */ 2832 /*----------------------------------------*/ 2833 for (imdex=0; imdex<nrecvs; imdex++) { 2834 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2835 } 2836 for (i=0; i<nsends; i++) { 2837 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2838 } 2839 count = nrecvs; 2840 while (count) { 2841 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2842 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2843 count--; 2844 } 2845 if (nsends) { 2846 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2847 } 2848 2849 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2850 2851 /* create redundant matrix */ 2852 /*-------------------------*/ 2853 if (reuse == MAT_INITIAL_MATRIX) { 2854 const PetscInt *range; 2855 PetscInt rstart_sub,rend_sub,mloc_sub; 2856 2857 /* compute rownz_max for preallocation */ 2858 for (imdex=0; imdex<nrecvs; imdex++) { 2859 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2860 rptr = rbuf_j[imdex]; 2861 for (i=0; i<j; i++) { 2862 ncols = rptr[i+1] - rptr[i]; 2863 if (rownz_max < ncols) rownz_max = ncols; 2864 } 2865 } 2866 2867 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2868 2869 /* get local size of redundant matrix 2870 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2871 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2872 rstart_sub = range[nsubcomm*subrank]; 2873 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2874 rend_sub = range[nsubcomm*(subrank+1)]; 2875 } else { 2876 rend_sub = mat->rmap->N; 2877 } 2878 mloc_sub = rend_sub - rstart_sub; 2879 2880 if (M == N) { 2881 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2882 } else { /* non-square matrix */ 2883 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2884 } 2885 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2886 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2887 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2888 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2889 } else { 2890 C = *matredundant; 2891 } 2892 2893 /* insert local matrix entries */ 2894 rptr = sbuf_j; 2895 cols = sbuf_j + rend-rstart + 1; 2896 vals = sbuf_a; 2897 for (i=0; i<rend-rstart; i++) { 2898 row = i + rstart; 2899 ncols = rptr[i+1] - rptr[i]; 2900 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2901 vals += ncols; 2902 cols += ncols; 2903 } 2904 /* insert received matrix entries */ 2905 for (imdex=0; imdex<nrecvs; imdex++) { 2906 rstart = rowrange[recv_rank[imdex]]; 2907 rend = rowrange[recv_rank[imdex]+1]; 2908 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2909 rptr = rbuf_j[imdex]; 2910 cols = rbuf_j[imdex] + rend-rstart + 1; 2911 vals = rbuf_a[imdex]; 2912 for (i=0; i<rend-rstart; i++) { 2913 row = i + rstart; 2914 ncols = rptr[i+1] - rptr[i]; 2915 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2916 vals += ncols; 2917 cols += ncols; 2918 } 2919 } 2920 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2921 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2922 2923 if (reuse == MAT_INITIAL_MATRIX) { 2924 *matredundant = C; 2925 2926 /* create a supporting struct and attach it to C for reuse */ 2927 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2928 if (subsize == 1) { 2929 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2930 c->redundant = redund; 2931 } else { 2932 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2933 c->redundant = redund; 2934 } 2935 2936 redund->nzlocal = nzlocal; 2937 redund->nsends = nsends; 2938 redund->nrecvs = nrecvs; 2939 redund->send_rank = send_rank; 2940 redund->recv_rank = recv_rank; 2941 redund->sbuf_nz = sbuf_nz; 2942 redund->rbuf_nz = rbuf_nz; 2943 redund->sbuf_j = sbuf_j; 2944 redund->sbuf_a = sbuf_a; 2945 redund->rbuf_j = rbuf_j; 2946 redund->rbuf_a = rbuf_a; 2947 redund->psubcomm = NULL; 2948 2949 redund->Destroy = C->ops->destroy; 2950 C->ops->destroy = MatDestroy_MatRedundant; 2951 } 2952 PetscFunctionReturn(0); 2953 } 2954 2955 #undef __FUNCT__ 2956 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2957 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2958 { 2959 PetscErrorCode ierr; 2960 MPI_Comm comm; 2961 PetscMPIInt size,subsize; 2962 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2963 Mat_Redundant *redund =NULL; 2964 PetscSubcomm psubcomm_in=psubcomm; 2965 MPI_Comm subcomm_in=subcomm; 2966 2967 PetscFunctionBegin; 2968 if (subcomm_in == MPI_COMM_NULL) { /* use psubcomm */ 2969 if (reuse == MAT_INITIAL_MATRIX) { 2970 if (psubcomm_in == NULL) { /* user does not provide psubcomm, create it here */ 2971 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2972 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2973 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2974 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2975 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2976 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2977 } 2978 subcomm = psubcomm->comm; 2979 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2980 } else { /* retrieve psubcomm */ 2981 if (psubcomm_in == NULL) { /* user does not provide psubcomm */ 2982 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2983 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2984 if (subsize == 1) { 2985 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2986 psubcomm = c->redundant->psubcomm; 2987 } else { 2988 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2989 psubcomm = c->redundant->psubcomm; 2990 } 2991 } else { /* use psubcomm_in */ 2992 subcomm = psubcomm_in->comm; 2993 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2994 } 2995 } 2996 } 2997 2998 if (subcomm_in==NULL && psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2999 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 3000 } else { 3001 /* via MatGetSubMatrices() */ 3002 Mat *matseq; 3003 IS isrow,iscol; 3004 3005 if (reuse == MAT_INITIAL_MATRIX) { 3006 /* create a local sequential matrix matseq[0] */ 3007 mloc_sub = PETSC_DECIDE; 3008 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 3009 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 3010 rstart = rend - mloc_sub; 3011 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 3012 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 3013 } else { /* reuse == MAT_REUSE_MATRIX */ 3014 if (subsize == 1) { 3015 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3016 redund = c->redundant; 3017 } else { 3018 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3019 redund = c->redundant; 3020 } 3021 3022 isrow = redund->isrow; 3023 iscol = redund->iscol; 3024 matseq = redund->matseq; 3025 } 3026 3027 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 3028 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 3029 3030 if (reuse == MAT_INITIAL_MATRIX) { 3031 /* create a supporting struct and attach it to C for reuse */ 3032 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 3033 if (subsize == 1) { 3034 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3035 c->redundant = redund; 3036 } else { 3037 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3038 c->redundant = redund; 3039 } 3040 redund->isrow = isrow; 3041 redund->iscol = iscol; 3042 redund->matseq = matseq; 3043 redund->psubcomm = NULL; 3044 redund->Destroy = (*matredundant)->ops->destroy; 3045 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 3046 } 3047 } 3048 3049 if (psubcomm && psubcomm_in==NULL) { 3050 /* if psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */ 3051 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 3052 if (subsize == 1) { 3053 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3054 c->redundant->psubcomm = psubcomm; 3055 } else { 3056 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3057 c->redundant->psubcomm = psubcomm ; 3058 } 3059 } 3060 PetscFunctionReturn(0); 3061 } 3062 3063 #undef __FUNCT__ 3064 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3065 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3066 { 3067 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3068 PetscErrorCode ierr; 3069 PetscInt i,*idxb = 0; 3070 PetscScalar *va,*vb; 3071 Vec vtmp; 3072 3073 PetscFunctionBegin; 3074 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3075 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3076 if (idx) { 3077 for (i=0; i<A->rmap->n; i++) { 3078 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3079 } 3080 } 3081 3082 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3083 if (idx) { 3084 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3085 } 3086 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3087 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3088 3089 for (i=0; i<A->rmap->n; i++) { 3090 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3091 va[i] = vb[i]; 3092 if (idx) idx[i] = a->garray[idxb[i]]; 3093 } 3094 } 3095 3096 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3097 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3098 ierr = PetscFree(idxb);CHKERRQ(ierr); 3099 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3100 PetscFunctionReturn(0); 3101 } 3102 3103 #undef __FUNCT__ 3104 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3105 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3106 { 3107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3108 PetscErrorCode ierr; 3109 PetscInt i,*idxb = 0; 3110 PetscScalar *va,*vb; 3111 Vec vtmp; 3112 3113 PetscFunctionBegin; 3114 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3115 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3116 if (idx) { 3117 for (i=0; i<A->cmap->n; i++) { 3118 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3119 } 3120 } 3121 3122 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3123 if (idx) { 3124 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3125 } 3126 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3127 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3128 3129 for (i=0; i<A->rmap->n; i++) { 3130 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3131 va[i] = vb[i]; 3132 if (idx) idx[i] = a->garray[idxb[i]]; 3133 } 3134 } 3135 3136 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3137 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3138 ierr = PetscFree(idxb);CHKERRQ(ierr); 3139 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3140 PetscFunctionReturn(0); 3141 } 3142 3143 #undef __FUNCT__ 3144 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3145 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3146 { 3147 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3148 PetscInt n = A->rmap->n; 3149 PetscInt cstart = A->cmap->rstart; 3150 PetscInt *cmap = mat->garray; 3151 PetscInt *diagIdx, *offdiagIdx; 3152 Vec diagV, offdiagV; 3153 PetscScalar *a, *diagA, *offdiagA; 3154 PetscInt r; 3155 PetscErrorCode ierr; 3156 3157 PetscFunctionBegin; 3158 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3159 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3160 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3161 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3162 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3163 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3164 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3165 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3166 for (r = 0; r < n; ++r) { 3167 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3168 a[r] = diagA[r]; 3169 idx[r] = cstart + diagIdx[r]; 3170 } else { 3171 a[r] = offdiagA[r]; 3172 idx[r] = cmap[offdiagIdx[r]]; 3173 } 3174 } 3175 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3176 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3177 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3178 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3179 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3180 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3186 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3187 { 3188 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3189 PetscInt n = A->rmap->n; 3190 PetscInt cstart = A->cmap->rstart; 3191 PetscInt *cmap = mat->garray; 3192 PetscInt *diagIdx, *offdiagIdx; 3193 Vec diagV, offdiagV; 3194 PetscScalar *a, *diagA, *offdiagA; 3195 PetscInt r; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3200 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3201 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3202 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3203 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3204 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3205 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3206 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3207 for (r = 0; r < n; ++r) { 3208 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3209 a[r] = diagA[r]; 3210 idx[r] = cstart + diagIdx[r]; 3211 } else { 3212 a[r] = offdiagA[r]; 3213 idx[r] = cmap[offdiagIdx[r]]; 3214 } 3215 } 3216 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3217 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3218 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3219 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3220 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3221 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3222 PetscFunctionReturn(0); 3223 } 3224 3225 #undef __FUNCT__ 3226 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3227 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3228 { 3229 PetscErrorCode ierr; 3230 Mat *dummy; 3231 3232 PetscFunctionBegin; 3233 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3234 *newmat = *dummy; 3235 ierr = PetscFree(dummy);CHKERRQ(ierr); 3236 PetscFunctionReturn(0); 3237 } 3238 3239 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3240 3241 #undef __FUNCT__ 3242 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3243 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3244 { 3245 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3250 PetscFunctionReturn(0); 3251 } 3252 3253 #undef __FUNCT__ 3254 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3255 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3256 { 3257 PetscErrorCode ierr; 3258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3259 3260 PetscFunctionBegin; 3261 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3262 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3263 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3264 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3265 PetscFunctionReturn(0); 3266 } 3267 3268 /* -------------------------------------------------------------------*/ 3269 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3270 MatGetRow_MPIAIJ, 3271 MatRestoreRow_MPIAIJ, 3272 MatMult_MPIAIJ, 3273 /* 4*/ MatMultAdd_MPIAIJ, 3274 MatMultTranspose_MPIAIJ, 3275 MatMultTransposeAdd_MPIAIJ, 3276 #if defined(PETSC_HAVE_PBGL) 3277 MatSolve_MPIAIJ, 3278 #else 3279 0, 3280 #endif 3281 0, 3282 0, 3283 /*10*/ 0, 3284 0, 3285 0, 3286 MatSOR_MPIAIJ, 3287 MatTranspose_MPIAIJ, 3288 /*15*/ MatGetInfo_MPIAIJ, 3289 MatEqual_MPIAIJ, 3290 MatGetDiagonal_MPIAIJ, 3291 MatDiagonalScale_MPIAIJ, 3292 MatNorm_MPIAIJ, 3293 /*20*/ MatAssemblyBegin_MPIAIJ, 3294 MatAssemblyEnd_MPIAIJ, 3295 MatSetOption_MPIAIJ, 3296 MatZeroEntries_MPIAIJ, 3297 /*24*/ MatZeroRows_MPIAIJ, 3298 0, 3299 #if defined(PETSC_HAVE_PBGL) 3300 0, 3301 #else 3302 0, 3303 #endif 3304 0, 3305 0, 3306 /*29*/ MatSetUp_MPIAIJ, 3307 #if defined(PETSC_HAVE_PBGL) 3308 0, 3309 #else 3310 0, 3311 #endif 3312 0, 3313 0, 3314 0, 3315 /*34*/ MatDuplicate_MPIAIJ, 3316 0, 3317 0, 3318 0, 3319 0, 3320 /*39*/ MatAXPY_MPIAIJ, 3321 MatGetSubMatrices_MPIAIJ, 3322 MatIncreaseOverlap_MPIAIJ, 3323 MatGetValues_MPIAIJ, 3324 MatCopy_MPIAIJ, 3325 /*44*/ MatGetRowMax_MPIAIJ, 3326 MatScale_MPIAIJ, 3327 0, 3328 0, 3329 MatZeroRowsColumns_MPIAIJ, 3330 /*49*/ MatSetRandom_MPIAIJ, 3331 0, 3332 0, 3333 0, 3334 0, 3335 /*54*/ MatFDColoringCreate_MPIAIJ, 3336 0, 3337 MatSetUnfactored_MPIAIJ, 3338 MatPermute_MPIAIJ, 3339 0, 3340 /*59*/ MatGetSubMatrix_MPIAIJ, 3341 MatDestroy_MPIAIJ, 3342 MatView_MPIAIJ, 3343 0, 3344 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3345 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3346 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3347 0, 3348 0, 3349 0, 3350 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3351 MatGetRowMinAbs_MPIAIJ, 3352 0, 3353 MatSetColoring_MPIAIJ, 3354 0, 3355 MatSetValuesAdifor_MPIAIJ, 3356 /*75*/ MatFDColoringApply_AIJ, 3357 0, 3358 0, 3359 0, 3360 MatFindZeroDiagonals_MPIAIJ, 3361 /*80*/ 0, 3362 0, 3363 0, 3364 /*83*/ MatLoad_MPIAIJ, 3365 0, 3366 0, 3367 0, 3368 0, 3369 0, 3370 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3371 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3372 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3373 MatPtAP_MPIAIJ_MPIAIJ, 3374 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3375 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3376 0, 3377 0, 3378 0, 3379 0, 3380 /*99*/ 0, 3381 0, 3382 0, 3383 MatConjugate_MPIAIJ, 3384 0, 3385 /*104*/MatSetValuesRow_MPIAIJ, 3386 MatRealPart_MPIAIJ, 3387 MatImaginaryPart_MPIAIJ, 3388 0, 3389 0, 3390 /*109*/0, 3391 MatGetRedundantMatrix_MPIAIJ, 3392 MatGetRowMin_MPIAIJ, 3393 0, 3394 0, 3395 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3396 0, 3397 0, 3398 0, 3399 0, 3400 /*119*/0, 3401 0, 3402 0, 3403 0, 3404 MatGetMultiProcBlock_MPIAIJ, 3405 /*124*/MatFindNonzeroRows_MPIAIJ, 3406 MatGetColumnNorms_MPIAIJ, 3407 MatInvertBlockDiagonal_MPIAIJ, 3408 0, 3409 MatGetSubMatricesParallel_MPIAIJ, 3410 /*129*/0, 3411 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3412 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3413 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3414 0, 3415 /*134*/0, 3416 0, 3417 0, 3418 0, 3419 0, 3420 /*139*/0, 3421 0 3422 }; 3423 3424 /* ----------------------------------------------------------------------------------------*/ 3425 3426 #undef __FUNCT__ 3427 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3428 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3429 { 3430 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3431 PetscErrorCode ierr; 3432 3433 PetscFunctionBegin; 3434 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3435 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 #undef __FUNCT__ 3440 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3441 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3442 { 3443 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3448 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } 3451 3452 #undef __FUNCT__ 3453 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3454 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3455 { 3456 Mat_MPIAIJ *b; 3457 PetscErrorCode ierr; 3458 3459 PetscFunctionBegin; 3460 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3461 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3462 b = (Mat_MPIAIJ*)B->data; 3463 3464 if (!B->preallocated) { 3465 /* Explicitly create 2 MATSEQAIJ matrices. */ 3466 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3467 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3468 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3469 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3470 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3471 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3472 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3473 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3474 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3475 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3476 } 3477 3478 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3479 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3480 B->preallocated = PETSC_TRUE; 3481 PetscFunctionReturn(0); 3482 } 3483 3484 #undef __FUNCT__ 3485 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3486 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3487 { 3488 Mat mat; 3489 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3490 PetscErrorCode ierr; 3491 3492 PetscFunctionBegin; 3493 *newmat = 0; 3494 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3495 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3496 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3497 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3498 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3499 a = (Mat_MPIAIJ*)mat->data; 3500 3501 mat->factortype = matin->factortype; 3502 mat->rmap->bs = matin->rmap->bs; 3503 mat->cmap->bs = matin->cmap->bs; 3504 mat->assembled = PETSC_TRUE; 3505 mat->insertmode = NOT_SET_VALUES; 3506 mat->preallocated = PETSC_TRUE; 3507 3508 a->size = oldmat->size; 3509 a->rank = oldmat->rank; 3510 a->donotstash = oldmat->donotstash; 3511 a->roworiented = oldmat->roworiented; 3512 a->rowindices = 0; 3513 a->rowvalues = 0; 3514 a->getrowactive = PETSC_FALSE; 3515 3516 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3517 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3518 3519 if (oldmat->colmap) { 3520 #if defined(PETSC_USE_CTABLE) 3521 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3522 #else 3523 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3524 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3525 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3526 #endif 3527 } else a->colmap = 0; 3528 if (oldmat->garray) { 3529 PetscInt len; 3530 len = oldmat->B->cmap->n; 3531 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3532 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3533 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3534 } else a->garray = 0; 3535 3536 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3537 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3538 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3539 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3540 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3541 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3542 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3543 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3544 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3545 *newmat = mat; 3546 PetscFunctionReturn(0); 3547 } 3548 3549 3550 3551 #undef __FUNCT__ 3552 #define __FUNCT__ "MatLoad_MPIAIJ" 3553 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3554 { 3555 PetscScalar *vals,*svals; 3556 MPI_Comm comm; 3557 PetscErrorCode ierr; 3558 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3559 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3560 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3561 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3562 PetscInt cend,cstart,n,*rowners,sizesset=1; 3563 int fd; 3564 PetscInt bs = 1; 3565 3566 PetscFunctionBegin; 3567 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3568 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3569 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3570 if (!rank) { 3571 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3572 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3573 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3574 } 3575 3576 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3577 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3578 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3579 3580 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3581 3582 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3583 M = header[1]; N = header[2]; 3584 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3585 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3586 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3587 3588 /* If global sizes are set, check if they are consistent with that given in the file */ 3589 if (sizesset) { 3590 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3591 } 3592 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3593 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3594 3595 /* determine ownership of all (block) rows */ 3596 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3597 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3598 else m = newMat->rmap->n; /* Set by user */ 3599 3600 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3601 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3602 3603 /* First process needs enough room for process with most rows */ 3604 if (!rank) { 3605 mmax = rowners[1]; 3606 for (i=2; i<=size; i++) { 3607 mmax = PetscMax(mmax, rowners[i]); 3608 } 3609 } else mmax = -1; /* unused, but compilers complain */ 3610 3611 rowners[0] = 0; 3612 for (i=2; i<=size; i++) { 3613 rowners[i] += rowners[i-1]; 3614 } 3615 rstart = rowners[rank]; 3616 rend = rowners[rank+1]; 3617 3618 /* distribute row lengths to all processors */ 3619 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3620 if (!rank) { 3621 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3622 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3623 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3624 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3625 for (j=0; j<m; j++) { 3626 procsnz[0] += ourlens[j]; 3627 } 3628 for (i=1; i<size; i++) { 3629 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3630 /* calculate the number of nonzeros on each processor */ 3631 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3632 procsnz[i] += rowlengths[j]; 3633 } 3634 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3635 } 3636 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3637 } else { 3638 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3639 } 3640 3641 if (!rank) { 3642 /* determine max buffer needed and allocate it */ 3643 maxnz = 0; 3644 for (i=0; i<size; i++) { 3645 maxnz = PetscMax(maxnz,procsnz[i]); 3646 } 3647 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3648 3649 /* read in my part of the matrix column indices */ 3650 nz = procsnz[0]; 3651 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3652 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3653 3654 /* read in every one elses and ship off */ 3655 for (i=1; i<size; i++) { 3656 nz = procsnz[i]; 3657 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3658 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3659 } 3660 ierr = PetscFree(cols);CHKERRQ(ierr); 3661 } else { 3662 /* determine buffer space needed for message */ 3663 nz = 0; 3664 for (i=0; i<m; i++) { 3665 nz += ourlens[i]; 3666 } 3667 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3668 3669 /* receive message of column indices*/ 3670 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3671 } 3672 3673 /* determine column ownership if matrix is not square */ 3674 if (N != M) { 3675 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3676 else n = newMat->cmap->n; 3677 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3678 cstart = cend - n; 3679 } else { 3680 cstart = rstart; 3681 cend = rend; 3682 n = cend - cstart; 3683 } 3684 3685 /* loop over local rows, determining number of off diagonal entries */ 3686 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3687 jj = 0; 3688 for (i=0; i<m; i++) { 3689 for (j=0; j<ourlens[i]; j++) { 3690 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3691 jj++; 3692 } 3693 } 3694 3695 for (i=0; i<m; i++) { 3696 ourlens[i] -= offlens[i]; 3697 } 3698 if (!sizesset) { 3699 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3700 } 3701 3702 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3703 3704 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3705 3706 for (i=0; i<m; i++) { 3707 ourlens[i] += offlens[i]; 3708 } 3709 3710 if (!rank) { 3711 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3712 3713 /* read in my part of the matrix numerical values */ 3714 nz = procsnz[0]; 3715 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3716 3717 /* insert into matrix */ 3718 jj = rstart; 3719 smycols = mycols; 3720 svals = vals; 3721 for (i=0; i<m; i++) { 3722 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3723 smycols += ourlens[i]; 3724 svals += ourlens[i]; 3725 jj++; 3726 } 3727 3728 /* read in other processors and ship out */ 3729 for (i=1; i<size; i++) { 3730 nz = procsnz[i]; 3731 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3732 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3733 } 3734 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3735 } else { 3736 /* receive numeric values */ 3737 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3738 3739 /* receive message of values*/ 3740 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3741 3742 /* insert into matrix */ 3743 jj = rstart; 3744 smycols = mycols; 3745 svals = vals; 3746 for (i=0; i<m; i++) { 3747 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3748 smycols += ourlens[i]; 3749 svals += ourlens[i]; 3750 jj++; 3751 } 3752 } 3753 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3754 ierr = PetscFree(vals);CHKERRQ(ierr); 3755 ierr = PetscFree(mycols);CHKERRQ(ierr); 3756 ierr = PetscFree(rowners);CHKERRQ(ierr); 3757 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3758 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3759 PetscFunctionReturn(0); 3760 } 3761 3762 #undef __FUNCT__ 3763 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3764 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3765 { 3766 PetscErrorCode ierr; 3767 IS iscol_local; 3768 PetscInt csize; 3769 3770 PetscFunctionBegin; 3771 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3772 if (call == MAT_REUSE_MATRIX) { 3773 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3774 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3775 } else { 3776 PetscInt cbs; 3777 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3778 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3779 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3780 } 3781 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3782 if (call == MAT_INITIAL_MATRIX) { 3783 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3784 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3785 } 3786 PetscFunctionReturn(0); 3787 } 3788 3789 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3790 #undef __FUNCT__ 3791 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3792 /* 3793 Not great since it makes two copies of the submatrix, first an SeqAIJ 3794 in local and then by concatenating the local matrices the end result. 3795 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3796 3797 Note: This requires a sequential iscol with all indices. 3798 */ 3799 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3800 { 3801 PetscErrorCode ierr; 3802 PetscMPIInt rank,size; 3803 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3804 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3805 PetscBool allcolumns, colflag; 3806 Mat M,Mreuse; 3807 MatScalar *vwork,*aa; 3808 MPI_Comm comm; 3809 Mat_SeqAIJ *aij; 3810 3811 PetscFunctionBegin; 3812 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3813 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3814 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3815 3816 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3817 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3818 if (colflag && ncol == mat->cmap->N) { 3819 allcolumns = PETSC_TRUE; 3820 } else { 3821 allcolumns = PETSC_FALSE; 3822 } 3823 if (call == MAT_REUSE_MATRIX) { 3824 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3825 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3826 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3827 } else { 3828 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3829 } 3830 3831 /* 3832 m - number of local rows 3833 n - number of columns (same on all processors) 3834 rstart - first row in new global matrix generated 3835 */ 3836 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3837 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3838 if (call == MAT_INITIAL_MATRIX) { 3839 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3840 ii = aij->i; 3841 jj = aij->j; 3842 3843 /* 3844 Determine the number of non-zeros in the diagonal and off-diagonal 3845 portions of the matrix in order to do correct preallocation 3846 */ 3847 3848 /* first get start and end of "diagonal" columns */ 3849 if (csize == PETSC_DECIDE) { 3850 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3851 if (mglobal == n) { /* square matrix */ 3852 nlocal = m; 3853 } else { 3854 nlocal = n/size + ((n % size) > rank); 3855 } 3856 } else { 3857 nlocal = csize; 3858 } 3859 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3860 rstart = rend - nlocal; 3861 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3862 3863 /* next, compute all the lengths */ 3864 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3865 olens = dlens + m; 3866 for (i=0; i<m; i++) { 3867 jend = ii[i+1] - ii[i]; 3868 olen = 0; 3869 dlen = 0; 3870 for (j=0; j<jend; j++) { 3871 if (*jj < rstart || *jj >= rend) olen++; 3872 else dlen++; 3873 jj++; 3874 } 3875 olens[i] = olen; 3876 dlens[i] = dlen; 3877 } 3878 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3879 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3880 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3881 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3882 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3883 ierr = PetscFree(dlens);CHKERRQ(ierr); 3884 } else { 3885 PetscInt ml,nl; 3886 3887 M = *newmat; 3888 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3889 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3890 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3891 /* 3892 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3893 rather than the slower MatSetValues(). 3894 */ 3895 M->was_assembled = PETSC_TRUE; 3896 M->assembled = PETSC_FALSE; 3897 } 3898 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3899 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3900 ii = aij->i; 3901 jj = aij->j; 3902 aa = aij->a; 3903 for (i=0; i<m; i++) { 3904 row = rstart + i; 3905 nz = ii[i+1] - ii[i]; 3906 cwork = jj; jj += nz; 3907 vwork = aa; aa += nz; 3908 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3909 } 3910 3911 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3912 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 *newmat = M; 3914 3915 /* save submatrix used in processor for next request */ 3916 if (call == MAT_INITIAL_MATRIX) { 3917 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3918 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3919 } 3920 PetscFunctionReturn(0); 3921 } 3922 3923 #undef __FUNCT__ 3924 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3925 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3926 { 3927 PetscInt m,cstart, cend,j,nnz,i,d; 3928 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3929 const PetscInt *JJ; 3930 PetscScalar *values; 3931 PetscErrorCode ierr; 3932 3933 PetscFunctionBegin; 3934 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3935 3936 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3937 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3938 m = B->rmap->n; 3939 cstart = B->cmap->rstart; 3940 cend = B->cmap->rend; 3941 rstart = B->rmap->rstart; 3942 3943 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3944 3945 #if defined(PETSC_USE_DEBUGGING) 3946 for (i=0; i<m; i++) { 3947 nnz = Ii[i+1]- Ii[i]; 3948 JJ = J + Ii[i]; 3949 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3950 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3951 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3952 } 3953 #endif 3954 3955 for (i=0; i<m; i++) { 3956 nnz = Ii[i+1]- Ii[i]; 3957 JJ = J + Ii[i]; 3958 nnz_max = PetscMax(nnz_max,nnz); 3959 d = 0; 3960 for (j=0; j<nnz; j++) { 3961 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3962 } 3963 d_nnz[i] = d; 3964 o_nnz[i] = nnz - d; 3965 } 3966 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3967 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3968 3969 if (v) values = (PetscScalar*)v; 3970 else { 3971 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3972 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3973 } 3974 3975 for (i=0; i<m; i++) { 3976 ii = i + rstart; 3977 nnz = Ii[i+1]- Ii[i]; 3978 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3979 } 3980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3982 3983 if (!v) { 3984 ierr = PetscFree(values);CHKERRQ(ierr); 3985 } 3986 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3987 PetscFunctionReturn(0); 3988 } 3989 3990 #undef __FUNCT__ 3991 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3992 /*@ 3993 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3994 (the default parallel PETSc format). 3995 3996 Collective on MPI_Comm 3997 3998 Input Parameters: 3999 + B - the matrix 4000 . i - the indices into j for the start of each local row (starts with zero) 4001 . j - the column indices for each local row (starts with zero) 4002 - v - optional values in the matrix 4003 4004 Level: developer 4005 4006 Notes: 4007 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4008 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4009 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4010 4011 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4012 4013 The format which is used for the sparse matrix input, is equivalent to a 4014 row-major ordering.. i.e for the following matrix, the input data expected is 4015 as shown: 4016 4017 1 0 0 4018 2 0 3 P0 4019 ------- 4020 4 5 6 P1 4021 4022 Process0 [P0]: rows_owned=[0,1] 4023 i = {0,1,3} [size = nrow+1 = 2+1] 4024 j = {0,0,2} [size = nz = 6] 4025 v = {1,2,3} [size = nz = 6] 4026 4027 Process1 [P1]: rows_owned=[2] 4028 i = {0,3} [size = nrow+1 = 1+1] 4029 j = {0,1,2} [size = nz = 6] 4030 v = {4,5,6} [size = nz = 6] 4031 4032 .keywords: matrix, aij, compressed row, sparse, parallel 4033 4034 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4035 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4036 @*/ 4037 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4038 { 4039 PetscErrorCode ierr; 4040 4041 PetscFunctionBegin; 4042 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4043 PetscFunctionReturn(0); 4044 } 4045 4046 #undef __FUNCT__ 4047 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4048 /*@C 4049 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4050 (the default parallel PETSc format). For good matrix assembly performance 4051 the user should preallocate the matrix storage by setting the parameters 4052 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4053 performance can be increased by more than a factor of 50. 4054 4055 Collective on MPI_Comm 4056 4057 Input Parameters: 4058 + A - the matrix 4059 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4060 (same value is used for all local rows) 4061 . d_nnz - array containing the number of nonzeros in the various rows of the 4062 DIAGONAL portion of the local submatrix (possibly different for each row) 4063 or NULL, if d_nz is used to specify the nonzero structure. 4064 The size of this array is equal to the number of local rows, i.e 'm'. 4065 For matrices that will be factored, you must leave room for (and set) 4066 the diagonal entry even if it is zero. 4067 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4068 submatrix (same value is used for all local rows). 4069 - o_nnz - array containing the number of nonzeros in the various rows of the 4070 OFF-DIAGONAL portion of the local submatrix (possibly different for 4071 each row) or NULL, if o_nz is used to specify the nonzero 4072 structure. The size of this array is equal to the number 4073 of local rows, i.e 'm'. 4074 4075 If the *_nnz parameter is given then the *_nz parameter is ignored 4076 4077 The AIJ format (also called the Yale sparse matrix format or 4078 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4079 storage. The stored row and column indices begin with zero. 4080 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4081 4082 The parallel matrix is partitioned such that the first m0 rows belong to 4083 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4084 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4085 4086 The DIAGONAL portion of the local submatrix of a processor can be defined 4087 as the submatrix which is obtained by extraction the part corresponding to 4088 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4089 first row that belongs to the processor, r2 is the last row belonging to 4090 the this processor, and c1-c2 is range of indices of the local part of a 4091 vector suitable for applying the matrix to. This is an mxn matrix. In the 4092 common case of a square matrix, the row and column ranges are the same and 4093 the DIAGONAL part is also square. The remaining portion of the local 4094 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4095 4096 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4097 4098 You can call MatGetInfo() to get information on how effective the preallocation was; 4099 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4100 You can also run with the option -info and look for messages with the string 4101 malloc in them to see if additional memory allocation was needed. 4102 4103 Example usage: 4104 4105 Consider the following 8x8 matrix with 34 non-zero values, that is 4106 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4107 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4108 as follows: 4109 4110 .vb 4111 1 2 0 | 0 3 0 | 0 4 4112 Proc0 0 5 6 | 7 0 0 | 8 0 4113 9 0 10 | 11 0 0 | 12 0 4114 ------------------------------------- 4115 13 0 14 | 15 16 17 | 0 0 4116 Proc1 0 18 0 | 19 20 21 | 0 0 4117 0 0 0 | 22 23 0 | 24 0 4118 ------------------------------------- 4119 Proc2 25 26 27 | 0 0 28 | 29 0 4120 30 0 0 | 31 32 33 | 0 34 4121 .ve 4122 4123 This can be represented as a collection of submatrices as: 4124 4125 .vb 4126 A B C 4127 D E F 4128 G H I 4129 .ve 4130 4131 Where the submatrices A,B,C are owned by proc0, D,E,F are 4132 owned by proc1, G,H,I are owned by proc2. 4133 4134 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4135 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4136 The 'M','N' parameters are 8,8, and have the same values on all procs. 4137 4138 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4139 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4140 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4141 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4142 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4143 matrix, ans [DF] as another SeqAIJ matrix. 4144 4145 When d_nz, o_nz parameters are specified, d_nz storage elements are 4146 allocated for every row of the local diagonal submatrix, and o_nz 4147 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4148 One way to choose d_nz and o_nz is to use the max nonzerors per local 4149 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4150 In this case, the values of d_nz,o_nz are: 4151 .vb 4152 proc0 : dnz = 2, o_nz = 2 4153 proc1 : dnz = 3, o_nz = 2 4154 proc2 : dnz = 1, o_nz = 4 4155 .ve 4156 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4157 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4158 for proc3. i.e we are using 12+15+10=37 storage locations to store 4159 34 values. 4160 4161 When d_nnz, o_nnz parameters are specified, the storage is specified 4162 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4163 In the above case the values for d_nnz,o_nnz are: 4164 .vb 4165 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4166 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4167 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4168 .ve 4169 Here the space allocated is sum of all the above values i.e 34, and 4170 hence pre-allocation is perfect. 4171 4172 Level: intermediate 4173 4174 .keywords: matrix, aij, compressed row, sparse, parallel 4175 4176 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4177 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4178 @*/ 4179 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4180 { 4181 PetscErrorCode ierr; 4182 4183 PetscFunctionBegin; 4184 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4185 PetscValidType(B,1); 4186 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4187 PetscFunctionReturn(0); 4188 } 4189 4190 #undef __FUNCT__ 4191 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4192 /*@ 4193 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4194 CSR format the local rows. 4195 4196 Collective on MPI_Comm 4197 4198 Input Parameters: 4199 + comm - MPI communicator 4200 . m - number of local rows (Cannot be PETSC_DECIDE) 4201 . n - This value should be the same as the local size used in creating the 4202 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4203 calculated if N is given) For square matrices n is almost always m. 4204 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4205 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4206 . i - row indices 4207 . j - column indices 4208 - a - matrix values 4209 4210 Output Parameter: 4211 . mat - the matrix 4212 4213 Level: intermediate 4214 4215 Notes: 4216 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4217 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4218 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4219 4220 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4221 4222 The format which is used for the sparse matrix input, is equivalent to a 4223 row-major ordering.. i.e for the following matrix, the input data expected is 4224 as shown: 4225 4226 1 0 0 4227 2 0 3 P0 4228 ------- 4229 4 5 6 P1 4230 4231 Process0 [P0]: rows_owned=[0,1] 4232 i = {0,1,3} [size = nrow+1 = 2+1] 4233 j = {0,0,2} [size = nz = 6] 4234 v = {1,2,3} [size = nz = 6] 4235 4236 Process1 [P1]: rows_owned=[2] 4237 i = {0,3} [size = nrow+1 = 1+1] 4238 j = {0,1,2} [size = nz = 6] 4239 v = {4,5,6} [size = nz = 6] 4240 4241 .keywords: matrix, aij, compressed row, sparse, parallel 4242 4243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4244 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4245 @*/ 4246 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4247 { 4248 PetscErrorCode ierr; 4249 4250 PetscFunctionBegin; 4251 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4252 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4254 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4255 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4256 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4257 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4258 PetscFunctionReturn(0); 4259 } 4260 4261 #undef __FUNCT__ 4262 #define __FUNCT__ "MatCreateAIJ" 4263 /*@C 4264 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4265 (the default parallel PETSc format). For good matrix assembly performance 4266 the user should preallocate the matrix storage by setting the parameters 4267 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4268 performance can be increased by more than a factor of 50. 4269 4270 Collective on MPI_Comm 4271 4272 Input Parameters: 4273 + comm - MPI communicator 4274 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4275 This value should be the same as the local size used in creating the 4276 y vector for the matrix-vector product y = Ax. 4277 . n - This value should be the same as the local size used in creating the 4278 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4279 calculated if N is given) For square matrices n is almost always m. 4280 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4281 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4282 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4283 (same value is used for all local rows) 4284 . d_nnz - array containing the number of nonzeros in the various rows of the 4285 DIAGONAL portion of the local submatrix (possibly different for each row) 4286 or NULL, if d_nz is used to specify the nonzero structure. 4287 The size of this array is equal to the number of local rows, i.e 'm'. 4288 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4289 submatrix (same value is used for all local rows). 4290 - o_nnz - array containing the number of nonzeros in the various rows of the 4291 OFF-DIAGONAL portion of the local submatrix (possibly different for 4292 each row) or NULL, if o_nz is used to specify the nonzero 4293 structure. The size of this array is equal to the number 4294 of local rows, i.e 'm'. 4295 4296 Output Parameter: 4297 . A - the matrix 4298 4299 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4300 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4301 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4302 4303 Notes: 4304 If the *_nnz parameter is given then the *_nz parameter is ignored 4305 4306 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4307 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4308 storage requirements for this matrix. 4309 4310 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4311 processor than it must be used on all processors that share the object for 4312 that argument. 4313 4314 The user MUST specify either the local or global matrix dimensions 4315 (possibly both). 4316 4317 The parallel matrix is partitioned across processors such that the 4318 first m0 rows belong to process 0, the next m1 rows belong to 4319 process 1, the next m2 rows belong to process 2 etc.. where 4320 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4321 values corresponding to [m x N] submatrix. 4322 4323 The columns are logically partitioned with the n0 columns belonging 4324 to 0th partition, the next n1 columns belonging to the next 4325 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4326 4327 The DIAGONAL portion of the local submatrix on any given processor 4328 is the submatrix corresponding to the rows and columns m,n 4329 corresponding to the given processor. i.e diagonal matrix on 4330 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4331 etc. The remaining portion of the local submatrix [m x (N-n)] 4332 constitute the OFF-DIAGONAL portion. The example below better 4333 illustrates this concept. 4334 4335 For a square global matrix we define each processor's diagonal portion 4336 to be its local rows and the corresponding columns (a square submatrix); 4337 each processor's off-diagonal portion encompasses the remainder of the 4338 local matrix (a rectangular submatrix). 4339 4340 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4341 4342 When calling this routine with a single process communicator, a matrix of 4343 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4344 type of communicator, use the construction mechanism: 4345 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4346 4347 By default, this format uses inodes (identical nodes) when possible. 4348 We search for consecutive rows with the same nonzero structure, thereby 4349 reusing matrix information to achieve increased efficiency. 4350 4351 Options Database Keys: 4352 + -mat_no_inode - Do not use inodes 4353 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4354 - -mat_aij_oneindex - Internally use indexing starting at 1 4355 rather than 0. Note that when calling MatSetValues(), 4356 the user still MUST index entries starting at 0! 4357 4358 4359 Example usage: 4360 4361 Consider the following 8x8 matrix with 34 non-zero values, that is 4362 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4363 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4364 as follows: 4365 4366 .vb 4367 1 2 0 | 0 3 0 | 0 4 4368 Proc0 0 5 6 | 7 0 0 | 8 0 4369 9 0 10 | 11 0 0 | 12 0 4370 ------------------------------------- 4371 13 0 14 | 15 16 17 | 0 0 4372 Proc1 0 18 0 | 19 20 21 | 0 0 4373 0 0 0 | 22 23 0 | 24 0 4374 ------------------------------------- 4375 Proc2 25 26 27 | 0 0 28 | 29 0 4376 30 0 0 | 31 32 33 | 0 34 4377 .ve 4378 4379 This can be represented as a collection of submatrices as: 4380 4381 .vb 4382 A B C 4383 D E F 4384 G H I 4385 .ve 4386 4387 Where the submatrices A,B,C are owned by proc0, D,E,F are 4388 owned by proc1, G,H,I are owned by proc2. 4389 4390 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4391 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4392 The 'M','N' parameters are 8,8, and have the same values on all procs. 4393 4394 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4395 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4396 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4397 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4398 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4399 matrix, ans [DF] as another SeqAIJ matrix. 4400 4401 When d_nz, o_nz parameters are specified, d_nz storage elements are 4402 allocated for every row of the local diagonal submatrix, and o_nz 4403 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4404 One way to choose d_nz and o_nz is to use the max nonzerors per local 4405 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4406 In this case, the values of d_nz,o_nz are: 4407 .vb 4408 proc0 : dnz = 2, o_nz = 2 4409 proc1 : dnz = 3, o_nz = 2 4410 proc2 : dnz = 1, o_nz = 4 4411 .ve 4412 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4413 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4414 for proc3. i.e we are using 12+15+10=37 storage locations to store 4415 34 values. 4416 4417 When d_nnz, o_nnz parameters are specified, the storage is specified 4418 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4419 In the above case the values for d_nnz,o_nnz are: 4420 .vb 4421 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4422 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4423 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4424 .ve 4425 Here the space allocated is sum of all the above values i.e 34, and 4426 hence pre-allocation is perfect. 4427 4428 Level: intermediate 4429 4430 .keywords: matrix, aij, compressed row, sparse, parallel 4431 4432 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4433 MPIAIJ, MatCreateMPIAIJWithArrays() 4434 @*/ 4435 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4436 { 4437 PetscErrorCode ierr; 4438 PetscMPIInt size; 4439 4440 PetscFunctionBegin; 4441 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4442 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4443 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4444 if (size > 1) { 4445 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4446 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4447 } else { 4448 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4449 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4450 } 4451 PetscFunctionReturn(0); 4452 } 4453 4454 #undef __FUNCT__ 4455 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4456 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4457 { 4458 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4459 4460 PetscFunctionBegin; 4461 *Ad = a->A; 4462 *Ao = a->B; 4463 *colmap = a->garray; 4464 PetscFunctionReturn(0); 4465 } 4466 4467 #undef __FUNCT__ 4468 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4469 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4470 { 4471 PetscErrorCode ierr; 4472 PetscInt i; 4473 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4474 4475 PetscFunctionBegin; 4476 if (coloring->ctype == IS_COLORING_GLOBAL) { 4477 ISColoringValue *allcolors,*colors; 4478 ISColoring ocoloring; 4479 4480 /* set coloring for diagonal portion */ 4481 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4482 4483 /* set coloring for off-diagonal portion */ 4484 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4485 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4486 for (i=0; i<a->B->cmap->n; i++) { 4487 colors[i] = allcolors[a->garray[i]]; 4488 } 4489 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4490 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4491 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4492 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4493 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4494 ISColoringValue *colors; 4495 PetscInt *larray; 4496 ISColoring ocoloring; 4497 4498 /* set coloring for diagonal portion */ 4499 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4500 for (i=0; i<a->A->cmap->n; i++) { 4501 larray[i] = i + A->cmap->rstart; 4502 } 4503 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4504 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4505 for (i=0; i<a->A->cmap->n; i++) { 4506 colors[i] = coloring->colors[larray[i]]; 4507 } 4508 ierr = PetscFree(larray);CHKERRQ(ierr); 4509 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4510 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4511 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4512 4513 /* set coloring for off-diagonal portion */ 4514 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4515 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4516 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4517 for (i=0; i<a->B->cmap->n; i++) { 4518 colors[i] = coloring->colors[larray[i]]; 4519 } 4520 ierr = PetscFree(larray);CHKERRQ(ierr); 4521 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4522 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4523 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4524 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4525 PetscFunctionReturn(0); 4526 } 4527 4528 #undef __FUNCT__ 4529 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4530 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4531 { 4532 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4533 PetscErrorCode ierr; 4534 4535 PetscFunctionBegin; 4536 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4537 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4538 PetscFunctionReturn(0); 4539 } 4540 4541 #undef __FUNCT__ 4542 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4543 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4544 { 4545 PetscErrorCode ierr; 4546 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4547 PetscInt *indx; 4548 4549 PetscFunctionBegin; 4550 /* This routine will ONLY return MPIAIJ type matrix */ 4551 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4552 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4553 if (n == PETSC_DECIDE) { 4554 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4555 } 4556 /* Check sum(n) = N */ 4557 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4558 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4559 4560 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4561 rstart -= m; 4562 4563 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4564 for (i=0; i<m; i++) { 4565 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4566 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4567 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4568 } 4569 4570 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4571 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4572 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4573 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4574 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4575 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4576 PetscFunctionReturn(0); 4577 } 4578 4579 #undef __FUNCT__ 4580 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4581 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4582 { 4583 PetscErrorCode ierr; 4584 PetscInt m,N,i,rstart,nnz,Ii; 4585 PetscInt *indx; 4586 PetscScalar *values; 4587 4588 PetscFunctionBegin; 4589 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4590 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4591 for (i=0; i<m; i++) { 4592 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4593 Ii = i + rstart; 4594 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4595 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4596 } 4597 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 PetscFunctionReturn(0); 4600 } 4601 4602 #undef __FUNCT__ 4603 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4604 /*@ 4605 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4606 matrices from each processor 4607 4608 Collective on MPI_Comm 4609 4610 Input Parameters: 4611 + comm - the communicators the parallel matrix will live on 4612 . inmat - the input sequential matrices 4613 . n - number of local columns (or PETSC_DECIDE) 4614 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4615 4616 Output Parameter: 4617 . outmat - the parallel matrix generated 4618 4619 Level: advanced 4620 4621 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4622 4623 @*/ 4624 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4625 { 4626 PetscErrorCode ierr; 4627 4628 PetscFunctionBegin; 4629 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4630 if (scall == MAT_INITIAL_MATRIX) { 4631 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4632 } 4633 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4634 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4635 PetscFunctionReturn(0); 4636 } 4637 4638 #undef __FUNCT__ 4639 #define __FUNCT__ "MatFileSplit" 4640 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4641 { 4642 PetscErrorCode ierr; 4643 PetscMPIInt rank; 4644 PetscInt m,N,i,rstart,nnz; 4645 size_t len; 4646 const PetscInt *indx; 4647 PetscViewer out; 4648 char *name; 4649 Mat B; 4650 const PetscScalar *values; 4651 4652 PetscFunctionBegin; 4653 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4654 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4655 /* Should this be the type of the diagonal block of A? */ 4656 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4657 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4658 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4659 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4660 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4661 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4662 for (i=0; i<m; i++) { 4663 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4664 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4665 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4666 } 4667 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4668 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4669 4670 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4671 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4672 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4673 sprintf(name,"%s.%d",outfile,rank); 4674 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4675 ierr = PetscFree(name);CHKERRQ(ierr); 4676 ierr = MatView(B,out);CHKERRQ(ierr); 4677 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4678 ierr = MatDestroy(&B);CHKERRQ(ierr); 4679 PetscFunctionReturn(0); 4680 } 4681 4682 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4683 #undef __FUNCT__ 4684 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4685 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4686 { 4687 PetscErrorCode ierr; 4688 Mat_Merge_SeqsToMPI *merge; 4689 PetscContainer container; 4690 4691 PetscFunctionBegin; 4692 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4693 if (container) { 4694 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4695 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4696 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4697 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4698 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4699 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4700 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4701 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4702 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4703 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4704 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4705 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4706 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4707 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4708 ierr = PetscFree(merge);CHKERRQ(ierr); 4709 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4710 } 4711 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4712 PetscFunctionReturn(0); 4713 } 4714 4715 #include <../src/mat/utils/freespace.h> 4716 #include <petscbt.h> 4717 4718 #undef __FUNCT__ 4719 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4720 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4721 { 4722 PetscErrorCode ierr; 4723 MPI_Comm comm; 4724 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4725 PetscMPIInt size,rank,taga,*len_s; 4726 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4727 PetscInt proc,m; 4728 PetscInt **buf_ri,**buf_rj; 4729 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4730 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4731 MPI_Request *s_waits,*r_waits; 4732 MPI_Status *status; 4733 MatScalar *aa=a->a; 4734 MatScalar **abuf_r,*ba_i; 4735 Mat_Merge_SeqsToMPI *merge; 4736 PetscContainer container; 4737 4738 PetscFunctionBegin; 4739 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4740 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4741 4742 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4743 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4744 4745 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4746 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4747 4748 bi = merge->bi; 4749 bj = merge->bj; 4750 buf_ri = merge->buf_ri; 4751 buf_rj = merge->buf_rj; 4752 4753 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4754 owners = merge->rowmap->range; 4755 len_s = merge->len_s; 4756 4757 /* send and recv matrix values */ 4758 /*-----------------------------*/ 4759 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4760 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4761 4762 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4763 for (proc=0,k=0; proc<size; proc++) { 4764 if (!len_s[proc]) continue; 4765 i = owners[proc]; 4766 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4767 k++; 4768 } 4769 4770 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4771 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4772 ierr = PetscFree(status);CHKERRQ(ierr); 4773 4774 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4776 4777 /* insert mat values of mpimat */ 4778 /*----------------------------*/ 4779 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4780 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4781 4782 for (k=0; k<merge->nrecv; k++) { 4783 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4784 nrows = *(buf_ri_k[k]); 4785 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4786 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4787 } 4788 4789 /* set values of ba */ 4790 m = merge->rowmap->n; 4791 for (i=0; i<m; i++) { 4792 arow = owners[rank] + i; 4793 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4794 bnzi = bi[i+1] - bi[i]; 4795 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4796 4797 /* add local non-zero vals of this proc's seqmat into ba */ 4798 anzi = ai[arow+1] - ai[arow]; 4799 aj = a->j + ai[arow]; 4800 aa = a->a + ai[arow]; 4801 nextaj = 0; 4802 for (j=0; nextaj<anzi; j++) { 4803 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4804 ba_i[j] += aa[nextaj++]; 4805 } 4806 } 4807 4808 /* add received vals into ba */ 4809 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4810 /* i-th row */ 4811 if (i == *nextrow[k]) { 4812 anzi = *(nextai[k]+1) - *nextai[k]; 4813 aj = buf_rj[k] + *(nextai[k]); 4814 aa = abuf_r[k] + *(nextai[k]); 4815 nextaj = 0; 4816 for (j=0; nextaj<anzi; j++) { 4817 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4818 ba_i[j] += aa[nextaj++]; 4819 } 4820 } 4821 nextrow[k]++; nextai[k]++; 4822 } 4823 } 4824 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4825 } 4826 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4827 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4828 4829 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4830 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4831 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4832 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4833 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4834 PetscFunctionReturn(0); 4835 } 4836 4837 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4838 4839 #undef __FUNCT__ 4840 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4841 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4842 { 4843 PetscErrorCode ierr; 4844 Mat B_mpi; 4845 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4846 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4847 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4848 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4849 PetscInt len,proc,*dnz,*onz,bs,cbs; 4850 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4851 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4852 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4853 MPI_Status *status; 4854 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4855 PetscBT lnkbt; 4856 Mat_Merge_SeqsToMPI *merge; 4857 PetscContainer container; 4858 4859 PetscFunctionBegin; 4860 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4861 4862 /* make sure it is a PETSc comm */ 4863 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4864 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4865 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4866 4867 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4868 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4869 4870 /* determine row ownership */ 4871 /*---------------------------------------------------------*/ 4872 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4873 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4874 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4875 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4876 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4877 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4878 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4879 4880 m = merge->rowmap->n; 4881 owners = merge->rowmap->range; 4882 4883 /* determine the number of messages to send, their lengths */ 4884 /*---------------------------------------------------------*/ 4885 len_s = merge->len_s; 4886 4887 len = 0; /* length of buf_si[] */ 4888 merge->nsend = 0; 4889 for (proc=0; proc<size; proc++) { 4890 len_si[proc] = 0; 4891 if (proc == rank) { 4892 len_s[proc] = 0; 4893 } else { 4894 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4895 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4896 } 4897 if (len_s[proc]) { 4898 merge->nsend++; 4899 nrows = 0; 4900 for (i=owners[proc]; i<owners[proc+1]; i++) { 4901 if (ai[i+1] > ai[i]) nrows++; 4902 } 4903 len_si[proc] = 2*(nrows+1); 4904 len += len_si[proc]; 4905 } 4906 } 4907 4908 /* determine the number and length of messages to receive for ij-structure */ 4909 /*-------------------------------------------------------------------------*/ 4910 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4911 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4912 4913 /* post the Irecv of j-structure */ 4914 /*-------------------------------*/ 4915 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4916 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4917 4918 /* post the Isend of j-structure */ 4919 /*--------------------------------*/ 4920 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4921 4922 for (proc=0, k=0; proc<size; proc++) { 4923 if (!len_s[proc]) continue; 4924 i = owners[proc]; 4925 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4926 k++; 4927 } 4928 4929 /* receives and sends of j-structure are complete */ 4930 /*------------------------------------------------*/ 4931 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4932 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4933 4934 /* send and recv i-structure */ 4935 /*---------------------------*/ 4936 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4937 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4938 4939 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4940 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4941 for (proc=0,k=0; proc<size; proc++) { 4942 if (!len_s[proc]) continue; 4943 /* form outgoing message for i-structure: 4944 buf_si[0]: nrows to be sent 4945 [1:nrows]: row index (global) 4946 [nrows+1:2*nrows+1]: i-structure index 4947 */ 4948 /*-------------------------------------------*/ 4949 nrows = len_si[proc]/2 - 1; 4950 buf_si_i = buf_si + nrows+1; 4951 buf_si[0] = nrows; 4952 buf_si_i[0] = 0; 4953 nrows = 0; 4954 for (i=owners[proc]; i<owners[proc+1]; i++) { 4955 anzi = ai[i+1] - ai[i]; 4956 if (anzi) { 4957 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4958 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4959 nrows++; 4960 } 4961 } 4962 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4963 k++; 4964 buf_si += len_si[proc]; 4965 } 4966 4967 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4968 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4969 4970 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4971 for (i=0; i<merge->nrecv; i++) { 4972 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4973 } 4974 4975 ierr = PetscFree(len_si);CHKERRQ(ierr); 4976 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4977 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4978 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4979 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4980 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4981 ierr = PetscFree(status);CHKERRQ(ierr); 4982 4983 /* compute a local seq matrix in each processor */ 4984 /*----------------------------------------------*/ 4985 /* allocate bi array and free space for accumulating nonzero column info */ 4986 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4987 bi[0] = 0; 4988 4989 /* create and initialize a linked list */ 4990 nlnk = N+1; 4991 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4992 4993 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4994 len = ai[owners[rank+1]] - ai[owners[rank]]; 4995 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4996 4997 current_space = free_space; 4998 4999 /* determine symbolic info for each local row */ 5000 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5001 5002 for (k=0; k<merge->nrecv; k++) { 5003 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5004 nrows = *buf_ri_k[k]; 5005 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5006 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5007 } 5008 5009 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5010 len = 0; 5011 for (i=0; i<m; i++) { 5012 bnzi = 0; 5013 /* add local non-zero cols of this proc's seqmat into lnk */ 5014 arow = owners[rank] + i; 5015 anzi = ai[arow+1] - ai[arow]; 5016 aj = a->j + ai[arow]; 5017 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5018 bnzi += nlnk; 5019 /* add received col data into lnk */ 5020 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5021 if (i == *nextrow[k]) { /* i-th row */ 5022 anzi = *(nextai[k]+1) - *nextai[k]; 5023 aj = buf_rj[k] + *nextai[k]; 5024 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5025 bnzi += nlnk; 5026 nextrow[k]++; nextai[k]++; 5027 } 5028 } 5029 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5030 5031 /* if free space is not available, make more free space */ 5032 if (current_space->local_remaining<bnzi) { 5033 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5034 nspacedouble++; 5035 } 5036 /* copy data into free space, then initialize lnk */ 5037 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5038 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5039 5040 current_space->array += bnzi; 5041 current_space->local_used += bnzi; 5042 current_space->local_remaining -= bnzi; 5043 5044 bi[i+1] = bi[i] + bnzi; 5045 } 5046 5047 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5048 5049 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5050 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5051 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5052 5053 /* create symbolic parallel matrix B_mpi */ 5054 /*---------------------------------------*/ 5055 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5056 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5057 if (n==PETSC_DECIDE) { 5058 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5059 } else { 5060 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5061 } 5062 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5063 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5064 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5065 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5066 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5067 5068 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5069 B_mpi->assembled = PETSC_FALSE; 5070 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5071 merge->bi = bi; 5072 merge->bj = bj; 5073 merge->buf_ri = buf_ri; 5074 merge->buf_rj = buf_rj; 5075 merge->coi = NULL; 5076 merge->coj = NULL; 5077 merge->owners_co = NULL; 5078 5079 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5080 5081 /* attach the supporting struct to B_mpi for reuse */ 5082 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5083 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5084 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5085 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5086 *mpimat = B_mpi; 5087 5088 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5089 PetscFunctionReturn(0); 5090 } 5091 5092 #undef __FUNCT__ 5093 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5094 /*@C 5095 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5096 matrices from each processor 5097 5098 Collective on MPI_Comm 5099 5100 Input Parameters: 5101 + comm - the communicators the parallel matrix will live on 5102 . seqmat - the input sequential matrices 5103 . m - number of local rows (or PETSC_DECIDE) 5104 . n - number of local columns (or PETSC_DECIDE) 5105 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5106 5107 Output Parameter: 5108 . mpimat - the parallel matrix generated 5109 5110 Level: advanced 5111 5112 Notes: 5113 The dimensions of the sequential matrix in each processor MUST be the same. 5114 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5115 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5116 @*/ 5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5118 { 5119 PetscErrorCode ierr; 5120 PetscMPIInt size; 5121 5122 PetscFunctionBegin; 5123 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5124 if (size == 1) { 5125 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5126 if (scall == MAT_INITIAL_MATRIX) { 5127 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5128 } else { 5129 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5130 } 5131 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5132 PetscFunctionReturn(0); 5133 } 5134 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5135 if (scall == MAT_INITIAL_MATRIX) { 5136 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5137 } 5138 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5139 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5140 PetscFunctionReturn(0); 5141 } 5142 5143 #undef __FUNCT__ 5144 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5145 /*@ 5146 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5147 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5148 with MatGetSize() 5149 5150 Not Collective 5151 5152 Input Parameters: 5153 + A - the matrix 5154 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5155 5156 Output Parameter: 5157 . A_loc - the local sequential matrix generated 5158 5159 Level: developer 5160 5161 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5162 5163 @*/ 5164 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5165 { 5166 PetscErrorCode ierr; 5167 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5168 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5169 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5170 MatScalar *aa=a->a,*ba=b->a,*cam; 5171 PetscScalar *ca; 5172 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5173 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5174 PetscBool match; 5175 5176 PetscFunctionBegin; 5177 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5178 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5179 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5180 if (scall == MAT_INITIAL_MATRIX) { 5181 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5182 ci[0] = 0; 5183 for (i=0; i<am; i++) { 5184 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5185 } 5186 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5187 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5188 k = 0; 5189 for (i=0; i<am; i++) { 5190 ncols_o = bi[i+1] - bi[i]; 5191 ncols_d = ai[i+1] - ai[i]; 5192 /* off-diagonal portion of A */ 5193 for (jo=0; jo<ncols_o; jo++) { 5194 col = cmap[*bj]; 5195 if (col >= cstart) break; 5196 cj[k] = col; bj++; 5197 ca[k++] = *ba++; 5198 } 5199 /* diagonal portion of A */ 5200 for (j=0; j<ncols_d; j++) { 5201 cj[k] = cstart + *aj++; 5202 ca[k++] = *aa++; 5203 } 5204 /* off-diagonal portion of A */ 5205 for (j=jo; j<ncols_o; j++) { 5206 cj[k] = cmap[*bj++]; 5207 ca[k++] = *ba++; 5208 } 5209 } 5210 /* put together the new matrix */ 5211 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5212 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5213 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5214 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5215 mat->free_a = PETSC_TRUE; 5216 mat->free_ij = PETSC_TRUE; 5217 mat->nonew = 0; 5218 } else if (scall == MAT_REUSE_MATRIX) { 5219 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5220 ci = mat->i; cj = mat->j; cam = mat->a; 5221 for (i=0; i<am; i++) { 5222 /* off-diagonal portion of A */ 5223 ncols_o = bi[i+1] - bi[i]; 5224 for (jo=0; jo<ncols_o; jo++) { 5225 col = cmap[*bj]; 5226 if (col >= cstart) break; 5227 *cam++ = *ba++; bj++; 5228 } 5229 /* diagonal portion of A */ 5230 ncols_d = ai[i+1] - ai[i]; 5231 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5232 /* off-diagonal portion of A */ 5233 for (j=jo; j<ncols_o; j++) { 5234 *cam++ = *ba++; bj++; 5235 } 5236 } 5237 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5238 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5239 PetscFunctionReturn(0); 5240 } 5241 5242 #undef __FUNCT__ 5243 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5244 /*@C 5245 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5246 5247 Not Collective 5248 5249 Input Parameters: 5250 + A - the matrix 5251 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5252 - row, col - index sets of rows and columns to extract (or NULL) 5253 5254 Output Parameter: 5255 . A_loc - the local sequential matrix generated 5256 5257 Level: developer 5258 5259 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5260 5261 @*/ 5262 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5263 { 5264 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5265 PetscErrorCode ierr; 5266 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5267 IS isrowa,iscola; 5268 Mat *aloc; 5269 PetscBool match; 5270 5271 PetscFunctionBegin; 5272 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5273 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5274 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5275 if (!row) { 5276 start = A->rmap->rstart; end = A->rmap->rend; 5277 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5278 } else { 5279 isrowa = *row; 5280 } 5281 if (!col) { 5282 start = A->cmap->rstart; 5283 cmap = a->garray; 5284 nzA = a->A->cmap->n; 5285 nzB = a->B->cmap->n; 5286 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5287 ncols = 0; 5288 for (i=0; i<nzB; i++) { 5289 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5290 else break; 5291 } 5292 imark = i; 5293 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5294 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5295 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5296 } else { 5297 iscola = *col; 5298 } 5299 if (scall != MAT_INITIAL_MATRIX) { 5300 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5301 aloc[0] = *A_loc; 5302 } 5303 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5304 *A_loc = aloc[0]; 5305 ierr = PetscFree(aloc);CHKERRQ(ierr); 5306 if (!row) { 5307 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5308 } 5309 if (!col) { 5310 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5311 } 5312 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5313 PetscFunctionReturn(0); 5314 } 5315 5316 #undef __FUNCT__ 5317 #define __FUNCT__ "MatGetBrowsOfAcols" 5318 /*@C 5319 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5320 5321 Collective on Mat 5322 5323 Input Parameters: 5324 + A,B - the matrices in mpiaij format 5325 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5326 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5327 5328 Output Parameter: 5329 + rowb, colb - index sets of rows and columns of B to extract 5330 - B_seq - the sequential matrix generated 5331 5332 Level: developer 5333 5334 @*/ 5335 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5336 { 5337 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5338 PetscErrorCode ierr; 5339 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5340 IS isrowb,iscolb; 5341 Mat *bseq=NULL; 5342 5343 PetscFunctionBegin; 5344 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5345 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5346 } 5347 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5348 5349 if (scall == MAT_INITIAL_MATRIX) { 5350 start = A->cmap->rstart; 5351 cmap = a->garray; 5352 nzA = a->A->cmap->n; 5353 nzB = a->B->cmap->n; 5354 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5355 ncols = 0; 5356 for (i=0; i<nzB; i++) { /* row < local row index */ 5357 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5358 else break; 5359 } 5360 imark = i; 5361 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5362 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5363 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5364 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5365 } else { 5366 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5367 isrowb = *rowb; iscolb = *colb; 5368 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5369 bseq[0] = *B_seq; 5370 } 5371 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5372 *B_seq = bseq[0]; 5373 ierr = PetscFree(bseq);CHKERRQ(ierr); 5374 if (!rowb) { 5375 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5376 } else { 5377 *rowb = isrowb; 5378 } 5379 if (!colb) { 5380 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5381 } else { 5382 *colb = iscolb; 5383 } 5384 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5385 PetscFunctionReturn(0); 5386 } 5387 5388 #undef __FUNCT__ 5389 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5390 /* 5391 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5392 of the OFF-DIAGONAL portion of local A 5393 5394 Collective on Mat 5395 5396 Input Parameters: 5397 + A,B - the matrices in mpiaij format 5398 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5399 5400 Output Parameter: 5401 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5402 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5403 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5404 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5405 5406 Level: developer 5407 5408 */ 5409 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5410 { 5411 VecScatter_MPI_General *gen_to,*gen_from; 5412 PetscErrorCode ierr; 5413 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5414 Mat_SeqAIJ *b_oth; 5415 VecScatter ctx =a->Mvctx; 5416 MPI_Comm comm; 5417 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5418 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5419 PetscScalar *rvalues,*svalues; 5420 MatScalar *b_otha,*bufa,*bufA; 5421 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5422 MPI_Request *rwaits = NULL,*swaits = NULL; 5423 MPI_Status *sstatus,rstatus; 5424 PetscMPIInt jj; 5425 PetscInt *cols,sbs,rbs; 5426 PetscScalar *vals; 5427 5428 PetscFunctionBegin; 5429 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5430 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5431 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5432 } 5433 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5434 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5435 5436 gen_to = (VecScatter_MPI_General*)ctx->todata; 5437 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5438 rvalues = gen_from->values; /* holds the length of receiving row */ 5439 svalues = gen_to->values; /* holds the length of sending row */ 5440 nrecvs = gen_from->n; 5441 nsends = gen_to->n; 5442 5443 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5444 srow = gen_to->indices; /* local row index to be sent */ 5445 sstarts = gen_to->starts; 5446 sprocs = gen_to->procs; 5447 sstatus = gen_to->sstatus; 5448 sbs = gen_to->bs; 5449 rstarts = gen_from->starts; 5450 rprocs = gen_from->procs; 5451 rbs = gen_from->bs; 5452 5453 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5454 if (scall == MAT_INITIAL_MATRIX) { 5455 /* i-array */ 5456 /*---------*/ 5457 /* post receives */ 5458 for (i=0; i<nrecvs; i++) { 5459 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5460 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5461 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5462 } 5463 5464 /* pack the outgoing message */ 5465 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5466 5467 sstartsj[0] = 0; 5468 rstartsj[0] = 0; 5469 len = 0; /* total length of j or a array to be sent */ 5470 k = 0; 5471 for (i=0; i<nsends; i++) { 5472 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5473 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5474 for (j=0; j<nrows; j++) { 5475 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5476 for (l=0; l<sbs; l++) { 5477 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5478 5479 rowlen[j*sbs+l] = ncols; 5480 5481 len += ncols; 5482 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5483 } 5484 k++; 5485 } 5486 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5487 5488 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5489 } 5490 /* recvs and sends of i-array are completed */ 5491 i = nrecvs; 5492 while (i--) { 5493 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5494 } 5495 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5496 5497 /* allocate buffers for sending j and a arrays */ 5498 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5499 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5500 5501 /* create i-array of B_oth */ 5502 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5503 5504 b_othi[0] = 0; 5505 len = 0; /* total length of j or a array to be received */ 5506 k = 0; 5507 for (i=0; i<nrecvs; i++) { 5508 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5509 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5510 for (j=0; j<nrows; j++) { 5511 b_othi[k+1] = b_othi[k] + rowlen[j]; 5512 len += rowlen[j]; k++; 5513 } 5514 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5515 } 5516 5517 /* allocate space for j and a arrrays of B_oth */ 5518 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5519 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5520 5521 /* j-array */ 5522 /*---------*/ 5523 /* post receives of j-array */ 5524 for (i=0; i<nrecvs; i++) { 5525 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5526 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5527 } 5528 5529 /* pack the outgoing message j-array */ 5530 k = 0; 5531 for (i=0; i<nsends; i++) { 5532 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5533 bufJ = bufj+sstartsj[i]; 5534 for (j=0; j<nrows; j++) { 5535 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5536 for (ll=0; ll<sbs; ll++) { 5537 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5538 for (l=0; l<ncols; l++) { 5539 *bufJ++ = cols[l]; 5540 } 5541 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5542 } 5543 } 5544 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5545 } 5546 5547 /* recvs and sends of j-array are completed */ 5548 i = nrecvs; 5549 while (i--) { 5550 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5551 } 5552 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5553 } else if (scall == MAT_REUSE_MATRIX) { 5554 sstartsj = *startsj_s; 5555 rstartsj = *startsj_r; 5556 bufa = *bufa_ptr; 5557 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5558 b_otha = b_oth->a; 5559 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5560 5561 /* a-array */ 5562 /*---------*/ 5563 /* post receives of a-array */ 5564 for (i=0; i<nrecvs; i++) { 5565 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5566 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5567 } 5568 5569 /* pack the outgoing message a-array */ 5570 k = 0; 5571 for (i=0; i<nsends; i++) { 5572 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5573 bufA = bufa+sstartsj[i]; 5574 for (j=0; j<nrows; j++) { 5575 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5576 for (ll=0; ll<sbs; ll++) { 5577 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5578 for (l=0; l<ncols; l++) { 5579 *bufA++ = vals[l]; 5580 } 5581 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5582 } 5583 } 5584 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5585 } 5586 /* recvs and sends of a-array are completed */ 5587 i = nrecvs; 5588 while (i--) { 5589 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5590 } 5591 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5592 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5593 5594 if (scall == MAT_INITIAL_MATRIX) { 5595 /* put together the new matrix */ 5596 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5597 5598 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5599 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5600 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5601 b_oth->free_a = PETSC_TRUE; 5602 b_oth->free_ij = PETSC_TRUE; 5603 b_oth->nonew = 0; 5604 5605 ierr = PetscFree(bufj);CHKERRQ(ierr); 5606 if (!startsj_s || !bufa_ptr) { 5607 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5608 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5609 } else { 5610 *startsj_s = sstartsj; 5611 *startsj_r = rstartsj; 5612 *bufa_ptr = bufa; 5613 } 5614 } 5615 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5616 PetscFunctionReturn(0); 5617 } 5618 5619 #undef __FUNCT__ 5620 #define __FUNCT__ "MatGetCommunicationStructs" 5621 /*@C 5622 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5623 5624 Not Collective 5625 5626 Input Parameters: 5627 . A - The matrix in mpiaij format 5628 5629 Output Parameter: 5630 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5631 . colmap - A map from global column index to local index into lvec 5632 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5633 5634 Level: developer 5635 5636 @*/ 5637 #if defined(PETSC_USE_CTABLE) 5638 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5639 #else 5640 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5641 #endif 5642 { 5643 Mat_MPIAIJ *a; 5644 5645 PetscFunctionBegin; 5646 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5647 PetscValidPointer(lvec, 2); 5648 PetscValidPointer(colmap, 3); 5649 PetscValidPointer(multScatter, 4); 5650 a = (Mat_MPIAIJ*) A->data; 5651 if (lvec) *lvec = a->lvec; 5652 if (colmap) *colmap = a->colmap; 5653 if (multScatter) *multScatter = a->Mvctx; 5654 PetscFunctionReturn(0); 5655 } 5656 5657 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5658 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5659 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5660 5661 #undef __FUNCT__ 5662 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5663 /* 5664 Computes (B'*A')' since computing B*A directly is untenable 5665 5666 n p p 5667 ( ) ( ) ( ) 5668 m ( A ) * n ( B ) = m ( C ) 5669 ( ) ( ) ( ) 5670 5671 */ 5672 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5673 { 5674 PetscErrorCode ierr; 5675 Mat At,Bt,Ct; 5676 5677 PetscFunctionBegin; 5678 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5679 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5680 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5681 ierr = MatDestroy(&At);CHKERRQ(ierr); 5682 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5683 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5684 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5685 PetscFunctionReturn(0); 5686 } 5687 5688 #undef __FUNCT__ 5689 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5690 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5691 { 5692 PetscErrorCode ierr; 5693 PetscInt m=A->rmap->n,n=B->cmap->n; 5694 Mat Cmat; 5695 5696 PetscFunctionBegin; 5697 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5698 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5699 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5700 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5701 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5702 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5703 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5704 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5705 5706 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5707 5708 *C = Cmat; 5709 PetscFunctionReturn(0); 5710 } 5711 5712 /* ----------------------------------------------------------------*/ 5713 #undef __FUNCT__ 5714 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5715 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5716 { 5717 PetscErrorCode ierr; 5718 5719 PetscFunctionBegin; 5720 if (scall == MAT_INITIAL_MATRIX) { 5721 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5722 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5723 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5724 } 5725 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5726 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5727 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5728 PetscFunctionReturn(0); 5729 } 5730 5731 #if defined(PETSC_HAVE_MUMPS) 5732 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5733 #endif 5734 #if defined(PETSC_HAVE_PASTIX) 5735 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5736 #endif 5737 #if defined(PETSC_HAVE_SUPERLU_DIST) 5738 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5739 #endif 5740 #if defined(PETSC_HAVE_CLIQUE) 5741 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5742 #endif 5743 5744 /*MC 5745 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5746 5747 Options Database Keys: 5748 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5749 5750 Level: beginner 5751 5752 .seealso: MatCreateAIJ() 5753 M*/ 5754 5755 #undef __FUNCT__ 5756 #define __FUNCT__ "MatCreate_MPIAIJ" 5757 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5758 { 5759 Mat_MPIAIJ *b; 5760 PetscErrorCode ierr; 5761 PetscMPIInt size; 5762 5763 PetscFunctionBegin; 5764 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5765 5766 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5767 B->data = (void*)b; 5768 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5769 B->assembled = PETSC_FALSE; 5770 B->insertmode = NOT_SET_VALUES; 5771 b->size = size; 5772 5773 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5774 5775 /* build cache for off array entries formed */ 5776 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5777 5778 b->donotstash = PETSC_FALSE; 5779 b->colmap = 0; 5780 b->garray = 0; 5781 b->roworiented = PETSC_TRUE; 5782 5783 /* stuff used for matrix vector multiply */ 5784 b->lvec = NULL; 5785 b->Mvctx = NULL; 5786 5787 /* stuff for MatGetRow() */ 5788 b->rowindices = 0; 5789 b->rowvalues = 0; 5790 b->getrowactive = PETSC_FALSE; 5791 5792 /* flexible pointer used in CUSP/CUSPARSE classes */ 5793 b->spptr = NULL; 5794 5795 #if defined(PETSC_HAVE_MUMPS) 5796 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5797 #endif 5798 #if defined(PETSC_HAVE_PASTIX) 5799 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5800 #endif 5801 #if defined(PETSC_HAVE_SUPERLU_DIST) 5802 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5803 #endif 5804 #if defined(PETSC_HAVE_CLIQUE) 5805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5806 #endif 5807 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5809 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5810 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5812 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5820 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5821 PetscFunctionReturn(0); 5822 } 5823 5824 #undef __FUNCT__ 5825 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5826 /*@ 5827 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5828 and "off-diagonal" part of the matrix in CSR format. 5829 5830 Collective on MPI_Comm 5831 5832 Input Parameters: 5833 + comm - MPI communicator 5834 . m - number of local rows (Cannot be PETSC_DECIDE) 5835 . n - This value should be the same as the local size used in creating the 5836 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5837 calculated if N is given) For square matrices n is almost always m. 5838 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5839 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5840 . i - row indices for "diagonal" portion of matrix 5841 . j - column indices 5842 . a - matrix values 5843 . oi - row indices for "off-diagonal" portion of matrix 5844 . oj - column indices 5845 - oa - matrix values 5846 5847 Output Parameter: 5848 . mat - the matrix 5849 5850 Level: advanced 5851 5852 Notes: 5853 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5854 must free the arrays once the matrix has been destroyed and not before. 5855 5856 The i and j indices are 0 based 5857 5858 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5859 5860 This sets local rows and cannot be used to set off-processor values. 5861 5862 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5863 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5864 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5865 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5866 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5867 communication if it is known that only local entries will be set. 5868 5869 .keywords: matrix, aij, compressed row, sparse, parallel 5870 5871 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5872 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5873 @*/ 5874 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5875 { 5876 PetscErrorCode ierr; 5877 Mat_MPIAIJ *maij; 5878 5879 PetscFunctionBegin; 5880 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5881 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5882 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5883 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5884 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5885 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5886 maij = (Mat_MPIAIJ*) (*mat)->data; 5887 5888 (*mat)->preallocated = PETSC_TRUE; 5889 5890 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5891 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5892 5893 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5894 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5895 5896 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5897 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5898 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5899 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5900 5901 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5902 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5903 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5904 PetscFunctionReturn(0); 5905 } 5906 5907 /* 5908 Special version for direct calls from Fortran 5909 */ 5910 #include <petsc-private/fortranimpl.h> 5911 5912 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5913 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5914 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5915 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5916 #endif 5917 5918 /* Change these macros so can be used in void function */ 5919 #undef CHKERRQ 5920 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5921 #undef SETERRQ2 5922 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5923 #undef SETERRQ3 5924 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5925 #undef SETERRQ 5926 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5927 5928 #undef __FUNCT__ 5929 #define __FUNCT__ "matsetvaluesmpiaij_" 5930 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5931 { 5932 Mat mat = *mmat; 5933 PetscInt m = *mm, n = *mn; 5934 InsertMode addv = *maddv; 5935 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5936 PetscScalar value; 5937 PetscErrorCode ierr; 5938 5939 MatCheckPreallocated(mat,1); 5940 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5941 5942 #if defined(PETSC_USE_DEBUG) 5943 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5944 #endif 5945 { 5946 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5947 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5948 PetscBool roworiented = aij->roworiented; 5949 5950 /* Some Variables required in the macro */ 5951 Mat A = aij->A; 5952 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5953 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5954 MatScalar *aa = a->a; 5955 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5956 Mat B = aij->B; 5957 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5958 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5959 MatScalar *ba = b->a; 5960 5961 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5962 PetscInt nonew = a->nonew; 5963 MatScalar *ap1,*ap2; 5964 5965 PetscFunctionBegin; 5966 for (i=0; i<m; i++) { 5967 if (im[i] < 0) continue; 5968 #if defined(PETSC_USE_DEBUG) 5969 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5970 #endif 5971 if (im[i] >= rstart && im[i] < rend) { 5972 row = im[i] - rstart; 5973 lastcol1 = -1; 5974 rp1 = aj + ai[row]; 5975 ap1 = aa + ai[row]; 5976 rmax1 = aimax[row]; 5977 nrow1 = ailen[row]; 5978 low1 = 0; 5979 high1 = nrow1; 5980 lastcol2 = -1; 5981 rp2 = bj + bi[row]; 5982 ap2 = ba + bi[row]; 5983 rmax2 = bimax[row]; 5984 nrow2 = bilen[row]; 5985 low2 = 0; 5986 high2 = nrow2; 5987 5988 for (j=0; j<n; j++) { 5989 if (roworiented) value = v[i*n+j]; 5990 else value = v[i+j*m]; 5991 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5992 if (in[j] >= cstart && in[j] < cend) { 5993 col = in[j] - cstart; 5994 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5995 } else if (in[j] < 0) continue; 5996 #if defined(PETSC_USE_DEBUG) 5997 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5998 #endif 5999 else { 6000 if (mat->was_assembled) { 6001 if (!aij->colmap) { 6002 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6003 } 6004 #if defined(PETSC_USE_CTABLE) 6005 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6006 col--; 6007 #else 6008 col = aij->colmap[in[j]] - 1; 6009 #endif 6010 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6011 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6012 col = in[j]; 6013 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6014 B = aij->B; 6015 b = (Mat_SeqAIJ*)B->data; 6016 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6017 rp2 = bj + bi[row]; 6018 ap2 = ba + bi[row]; 6019 rmax2 = bimax[row]; 6020 nrow2 = bilen[row]; 6021 low2 = 0; 6022 high2 = nrow2; 6023 bm = aij->B->rmap->n; 6024 ba = b->a; 6025 } 6026 } else col = in[j]; 6027 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6028 } 6029 } 6030 } else if (!aij->donotstash) { 6031 if (roworiented) { 6032 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6033 } else { 6034 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6035 } 6036 } 6037 } 6038 } 6039 PetscFunctionReturnVoid(); 6040 } 6041 6042