1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 138 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 139 if (type == NORM_2) { 140 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 141 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 142 } 143 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 144 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 145 } 146 } else if (type == NORM_1) { 147 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 148 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 149 } 150 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 151 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 152 } 153 } else if (type == NORM_INFINITY) { 154 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 155 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 156 } 157 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 158 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 159 } 160 161 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 162 if (type == NORM_INFINITY) { 163 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 164 } else { 165 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 166 } 167 ierr = PetscFree(work);CHKERRQ(ierr); 168 if (type == NORM_2) { 169 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 170 } 171 PetscFunctionReturn(0); 172 } 173 174 #undef __FUNCT__ 175 #define __FUNCT__ "MatDistribute_MPIAIJ" 176 /* 177 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 178 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 179 180 Only for square matrices 181 182 Used by a preconditioner, hence PETSC_EXTERN 183 */ 184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 185 { 186 PetscMPIInt rank,size; 187 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2]; 188 PetscErrorCode ierr; 189 Mat mat; 190 Mat_SeqAIJ *gmata; 191 PetscMPIInt tag; 192 MPI_Status status; 193 PetscBool aij; 194 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 195 196 PetscFunctionBegin; 197 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 198 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 199 if (!rank) { 200 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 201 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 202 } 203 if (reuse == MAT_INITIAL_MATRIX) { 204 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 205 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 206 if (!rank) { 207 bses[0] = gmat->rmap->bs; 208 bses[1] = gmat->cmap->bs; 209 } 210 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 211 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 212 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 213 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 214 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 215 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 216 217 rowners[0] = 0; 218 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 219 rstart = rowners[rank]; 220 rend = rowners[rank+1]; 221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 222 if (!rank) { 223 gmata = (Mat_SeqAIJ*) gmat->data; 224 /* send row lengths to all processors */ 225 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 226 for (i=1; i<size; i++) { 227 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 228 } 229 /* determine number diagonal and off-diagonal counts */ 230 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 231 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 232 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 233 jj = 0; 234 for (i=0; i<m; i++) { 235 for (j=0; j<dlens[i]; j++) { 236 if (gmata->j[jj] < rstart) ld[i]++; 237 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 238 jj++; 239 } 240 } 241 /* send column indices to other processes */ 242 for (i=1; i<size; i++) { 243 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 244 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 245 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 246 } 247 248 /* send numerical values to other processes */ 249 for (i=1; i<size; i++) { 250 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 251 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 252 } 253 gmataa = gmata->a; 254 gmataj = gmata->j; 255 256 } else { 257 /* receive row lengths */ 258 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 259 /* receive column indices */ 260 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 261 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 262 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 263 /* determine number diagonal and off-diagonal counts */ 264 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 265 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 266 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 267 jj = 0; 268 for (i=0; i<m; i++) { 269 for (j=0; j<dlens[i]; j++) { 270 if (gmataj[jj] < rstart) ld[i]++; 271 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 272 jj++; 273 } 274 } 275 /* receive numerical values */ 276 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 277 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 278 } 279 /* set preallocation */ 280 for (i=0; i<m; i++) { 281 dlens[i] -= olens[i]; 282 } 283 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 284 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 285 286 for (i=0; i<m; i++) { 287 dlens[i] += olens[i]; 288 } 289 cnt = 0; 290 for (i=0; i<m; i++) { 291 row = rstart + i; 292 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 293 cnt += dlens[i]; 294 } 295 if (rank) { 296 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 297 } 298 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 299 ierr = PetscFree(rowners);CHKERRQ(ierr); 300 301 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 302 303 *inmat = mat; 304 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 307 mat = *inmat; 308 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 309 if (!rank) { 310 /* send numerical values to other processes */ 311 gmata = (Mat_SeqAIJ*) gmat->data; 312 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 313 gmataa = gmata->a; 314 for (i=1; i<size; i++) { 315 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 316 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 317 } 318 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 319 } else { 320 /* receive numerical values from process 0*/ 321 nz = Ad->nz + Ao->nz; 322 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 326 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 327 ad = Ad->a; 328 ao = Ao->a; 329 if (mat->rmap->n) { 330 i = 0; 331 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 332 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 333 } 334 for (i=1; i<mat->rmap->n; i++) { 335 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 336 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 337 } 338 i--; 339 if (mat->rmap->n) { 340 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 341 } 342 if (rank) { 343 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 344 } 345 } 346 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 Local utility routine that creates a mapping from the global column 353 number to the local number in the off-diagonal part of the local 354 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 355 a slightly higher hash table cost; without it it is not scalable (each processor 356 has an order N integer array but is fast to acess. 357 */ 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 361 { 362 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 363 PetscErrorCode ierr; 364 PetscInt n = aij->B->cmap->n,i; 365 366 PetscFunctionBegin; 367 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 368 #if defined(PETSC_USE_CTABLE) 369 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 370 for (i=0; i<n; i++) { 371 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 372 } 373 #else 374 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 375 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 376 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 377 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 378 #endif 379 PetscFunctionReturn(0); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 383 { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col;\ 387 while (high1-low1 > 5) { \ 388 t = (low1+high1)/2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i=low1; _i<high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) ap1[_i] += value; \ 396 else ap1[_i] = value; \ 397 goto a_noinsert; \ 398 } \ 399 } \ 400 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 401 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 402 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 403 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 404 N = nrow1++ - 1; a->nz++; high1++; \ 405 /* shift up all the later entries in this row */ \ 406 for (ii=N; ii>=_i; ii--) { \ 407 rp1[ii+1] = rp1[ii]; \ 408 ap1[ii+1] = ap1[ii]; \ 409 } \ 410 rp1[_i] = col; \ 411 ap1[_i] = value; \ 412 a_noinsert: ; \ 413 ailen[row] = nrow1; \ 414 } 415 416 417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 418 { \ 419 if (col <= lastcol2) low2 = 0; \ 420 else high2 = nrow2; \ 421 lastcol2 = col; \ 422 while (high2-low2 > 5) { \ 423 t = (low2+high2)/2; \ 424 if (rp2[t] > col) high2 = t; \ 425 else low2 = t; \ 426 } \ 427 for (_i=low2; _i<high2; _i++) { \ 428 if (rp2[_i] > col) break; \ 429 if (rp2[_i] == col) { \ 430 if (addv == ADD_VALUES) ap2[_i] += value; \ 431 else ap2[_i] = value; \ 432 goto b_noinsert; \ 433 } \ 434 } \ 435 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 436 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 437 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 438 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 439 N = nrow2++ - 1; b->nz++; high2++; \ 440 /* shift up all the later entries in this row */ \ 441 for (ii=N; ii>=_i; ii--) { \ 442 rp2[ii+1] = rp2[ii]; \ 443 ap2[ii+1] = ap2[ii]; \ 444 } \ 445 rp2[_i] = col; \ 446 ap2[_i] = value; \ 447 b_noinsert: ; \ 448 bilen[row] = nrow2; \ 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 454 { 455 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 457 PetscErrorCode ierr; 458 PetscInt l,*garray = mat->garray,diag; 459 460 PetscFunctionBegin; 461 /* code only works for square matrices A */ 462 463 /* find size of row to the left of the diagonal part */ 464 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 465 row = row - diag; 466 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 467 if (garray[b->j[b->i[row]+l]] > diag) break; 468 } 469 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 470 471 /* diagonal part */ 472 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 473 474 /* right of diagonal part */ 475 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 #undef __FUNCT__ 480 #define __FUNCT__ "MatSetValues_MPIAIJ" 481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 482 { 483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 484 PetscScalar value; 485 PetscErrorCode ierr; 486 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 487 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 488 PetscBool roworiented = aij->roworiented; 489 490 /* Some Variables required in the macro */ 491 Mat A = aij->A; 492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 493 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 494 MatScalar *aa = a->a; 495 PetscBool ignorezeroentries = a->ignorezeroentries; 496 Mat B = aij->B; 497 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 498 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 499 MatScalar *ba = b->a; 500 501 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 502 PetscInt nonew; 503 MatScalar *ap1,*ap2; 504 505 PetscFunctionBegin; 506 if (v) PetscValidScalarPointer(v,6); 507 for (i=0; i<m; i++) { 508 if (im[i] < 0) continue; 509 #if defined(PETSC_USE_DEBUG) 510 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 511 #endif 512 if (im[i] >= rstart && im[i] < rend) { 513 row = im[i] - rstart; 514 lastcol1 = -1; 515 rp1 = aj + ai[row]; 516 ap1 = aa + ai[row]; 517 rmax1 = aimax[row]; 518 nrow1 = ailen[row]; 519 low1 = 0; 520 high1 = nrow1; 521 lastcol2 = -1; 522 rp2 = bj + bi[row]; 523 ap2 = ba + bi[row]; 524 rmax2 = bimax[row]; 525 nrow2 = bilen[row]; 526 low2 = 0; 527 high2 = nrow2; 528 529 for (j=0; j<n; j++) { 530 if (v) { 531 if (roworiented) value = v[i*n+j]; 532 else value = v[i+j*m]; 533 } else value = 0.0; 534 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 535 if (in[j] >= cstart && in[j] < cend) { 536 col = in[j] - cstart; 537 nonew = a->nonew; 538 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 539 } else if (in[j] < 0) continue; 540 #if defined(PETSC_USE_DEBUG) 541 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 542 #endif 543 else { 544 if (mat->was_assembled) { 545 if (!aij->colmap) { 546 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 547 } 548 #if defined(PETSC_USE_CTABLE) 549 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 550 col--; 551 #else 552 col = aij->colmap[in[j]] - 1; 553 #endif 554 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 555 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 556 col = in[j]; 557 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 558 B = aij->B; 559 b = (Mat_SeqAIJ*)B->data; 560 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 561 rp2 = bj + bi[row]; 562 ap2 = ba + bi[row]; 563 rmax2 = bimax[row]; 564 nrow2 = bilen[row]; 565 low2 = 0; 566 high2 = nrow2; 567 bm = aij->B->rmap->n; 568 ba = b->a; 569 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 570 } else col = in[j]; 571 nonew = b->nonew; 572 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 573 } 574 } 575 } else { 576 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 577 if (!aij->donotstash) { 578 mat->assembled = PETSC_FALSE; 579 if (roworiented) { 580 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 581 } else { 582 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 583 } 584 } 585 } 586 } 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatGetValues_MPIAIJ" 592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 593 { 594 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 595 PetscErrorCode ierr; 596 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 597 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 598 599 PetscFunctionBegin; 600 for (i=0; i<m; i++) { 601 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 602 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 603 if (idxm[i] >= rstart && idxm[i] < rend) { 604 row = idxm[i] - rstart; 605 for (j=0; j<n; j++) { 606 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 607 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 608 if (idxn[j] >= cstart && idxn[j] < cend) { 609 col = idxn[j] - cstart; 610 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 611 } else { 612 if (!aij->colmap) { 613 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 614 } 615 #if defined(PETSC_USE_CTABLE) 616 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 617 col--; 618 #else 619 col = aij->colmap[idxn[j]] - 1; 620 #endif 621 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 622 else { 623 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 624 } 625 } 626 } 627 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 628 } 629 PetscFunctionReturn(0); 630 } 631 632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt nstash,reallocs; 641 InsertMode addv; 642 643 PetscFunctionBegin; 644 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 645 646 /* make sure all processors are either in INSERTMODE or ADDMODE */ 647 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 648 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 649 mat->insertmode = addv; /* in case this processor had no cache */ 650 651 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 652 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 653 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 #undef __FUNCT__ 658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 660 { 661 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 663 PetscErrorCode ierr; 664 PetscMPIInt n; 665 PetscInt i,j,rstart,ncols,flg; 666 PetscInt *row,*col; 667 PetscBool other_disassembled; 668 PetscScalar *val; 669 InsertMode addv = mat->insertmode; 670 671 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 672 673 PetscFunctionBegin; 674 if (!aij->donotstash && !mat->nooffprocentries) { 675 while (1) { 676 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 677 if (!flg) break; 678 679 for (i=0; i<n; ) { 680 /* Now identify the consecutive vals belonging to the same row */ 681 for (j=i,rstart=row[j]; j<n; j++) { 682 if (row[j] != rstart) break; 683 } 684 if (j < n) ncols = j-i; 685 else ncols = n-i; 686 /* Now assemble all these values with a single function call */ 687 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 688 689 i = j; 690 } 691 } 692 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 693 } 694 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 695 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 696 697 /* determine if any processor has disassembled, if so we must 698 also disassemble ourselfs, in order that we may reassemble. */ 699 /* 700 if nonzero structure of submatrix B cannot change then we know that 701 no processor disassembled thus we can skip this stuff 702 */ 703 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 704 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 705 if (mat->was_assembled && !other_disassembled) { 706 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 707 } 708 } 709 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 710 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 711 } 712 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 713 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 714 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 715 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 716 717 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 718 719 aij->rowvalues = 0; 720 721 /* used by MatAXPY() */ 722 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 723 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 724 725 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 726 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 727 PetscFunctionReturn(0); 728 } 729 730 #undef __FUNCT__ 731 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 733 { 734 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 735 PetscErrorCode ierr; 736 737 PetscFunctionBegin; 738 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 739 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatZeroRows_MPIAIJ" 745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 746 { 747 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 748 PetscErrorCode ierr; 749 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 750 PetscInt i,*owners = A->rmap->range; 751 PetscInt *nprocs,j,idx,nsends,row; 752 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 753 PetscInt *rvalues,count,base,slen,*source; 754 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 755 MPI_Comm comm; 756 MPI_Request *send_waits,*recv_waits; 757 MPI_Status recv_status,*send_status; 758 const PetscScalar *xx; 759 PetscScalar *bb; 760 #if defined(PETSC_DEBUG) 761 PetscBool found = PETSC_FALSE; 762 #endif 763 764 PetscFunctionBegin; 765 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 766 /* first count number of contributors to each processor */ 767 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 768 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 769 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 770 j = 0; 771 for (i=0; i<N; i++) { 772 if (lastidx > (idx = rows[i])) j = 0; 773 lastidx = idx; 774 for (; j<size; j++) { 775 if (idx >= owners[j] && idx < owners[j+1]) { 776 nprocs[2*j]++; 777 nprocs[2*j+1] = 1; 778 owner[i] = j; 779 #if defined(PETSC_DEBUG) 780 found = PETSC_TRUE; 781 #endif 782 break; 783 } 784 } 785 #if defined(PETSC_DEBUG) 786 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 787 found = PETSC_FALSE; 788 #endif 789 } 790 nsends = 0; 791 for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 792 793 if (A->nooffproczerorows) { 794 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 795 nrecvs = nsends; 796 nmax = N; 797 } else { 798 /* inform other processors of number of messages and max length*/ 799 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 800 } 801 802 /* post receives: */ 803 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 804 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 805 for (i=0; i<nrecvs; i++) { 806 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 807 } 808 809 /* do sends: 810 1) starts[i] gives the starting index in svalues for stuff going to 811 the ith processor 812 */ 813 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 814 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 815 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 816 817 starts[0] = 0; 818 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 819 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 820 821 starts[0] = 0; 822 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 823 count = 0; 824 for (i=0; i<size; i++) { 825 if (nprocs[2*i+1]) { 826 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 827 } 828 } 829 ierr = PetscFree(starts);CHKERRQ(ierr); 830 831 base = owners[rank]; 832 833 /* wait on receives */ 834 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 835 count = nrecvs; slen = 0; 836 while (count) { 837 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 838 /* unpack receives into our local space */ 839 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 840 841 source[imdex] = recv_status.MPI_SOURCE; 842 lens[imdex] = n; 843 slen += n; 844 count--; 845 } 846 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 847 848 /* move the data into the send scatter */ 849 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 850 count = 0; 851 for (i=0; i<nrecvs; i++) { 852 values = rvalues + i*nmax; 853 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 854 } 855 ierr = PetscFree(rvalues);CHKERRQ(ierr); 856 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 857 ierr = PetscFree(owner);CHKERRQ(ierr); 858 ierr = PetscFree(nprocs);CHKERRQ(ierr); 859 860 /* fix right hand side if needed */ 861 if (x && b) { 862 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 863 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 864 for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]]; 865 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 866 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 867 } 868 /* 869 Zero the required rows. If the "diagonal block" of the matrix 870 is square and the user wishes to set the diagonal we use separate 871 code so that MatSetValues() is not called for each diagonal allocating 872 new memory, thus calling lots of mallocs and slowing things down. 873 874 */ 875 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 876 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 877 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 878 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 879 } else if (diag != 0.0) { 880 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 881 if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 882 for (i = 0; i < slen; i++) { 883 row = lrows[i] + rstart; 884 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 885 } 886 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 887 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 888 } else { 889 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 890 } 891 ierr = PetscFree(lrows);CHKERRQ(ierr); 892 893 /* wait on sends */ 894 if (nsends) { 895 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 896 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 897 ierr = PetscFree(send_status);CHKERRQ(ierr); 898 } 899 ierr = PetscFree(send_waits);CHKERRQ(ierr); 900 ierr = PetscFree(svalues);CHKERRQ(ierr); 901 PetscFunctionReturn(0); 902 } 903 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 907 { 908 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 911 PetscInt i,*owners = A->rmap->range; 912 PetscInt *nprocs,j,idx,nsends; 913 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 914 PetscInt *rvalues,count,base,slen,*source; 915 PetscInt *lens,*lrows,*values,m; 916 MPI_Comm comm; 917 MPI_Request *send_waits,*recv_waits; 918 MPI_Status recv_status,*send_status; 919 const PetscScalar *xx; 920 PetscScalar *bb,*mask; 921 Vec xmask,lmask; 922 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 923 const PetscInt *aj, *ii,*ridx; 924 PetscScalar *aa; 925 #if defined(PETSC_DEBUG) 926 PetscBool found = PETSC_FALSE; 927 #endif 928 929 PetscFunctionBegin; 930 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 931 /* first count number of contributors to each processor */ 932 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 933 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 934 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 935 j = 0; 936 for (i=0; i<N; i++) { 937 if (lastidx > (idx = rows[i])) j = 0; 938 lastidx = idx; 939 for (; j<size; j++) { 940 if (idx >= owners[j] && idx < owners[j+1]) { 941 nprocs[2*j]++; 942 nprocs[2*j+1] = 1; 943 owner[i] = j; 944 #if defined(PETSC_DEBUG) 945 found = PETSC_TRUE; 946 #endif 947 break; 948 } 949 } 950 #if defined(PETSC_DEBUG) 951 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 952 found = PETSC_FALSE; 953 #endif 954 } 955 nsends = 0; for (i=0; i<size; i++) nsends += nprocs[2*i+1]; 956 957 /* inform other processors of number of messages and max length*/ 958 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 959 960 /* post receives: */ 961 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 962 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 963 for (i=0; i<nrecvs; i++) { 964 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 965 } 966 967 /* do sends: 968 1) starts[i] gives the starting index in svalues for stuff going to 969 the ith processor 970 */ 971 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 972 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 973 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 974 975 starts[0] = 0; 976 for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 977 for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i]; 978 979 starts[0] = 0; 980 for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2]; 981 count = 0; 982 for (i=0; i<size; i++) { 983 if (nprocs[2*i+1]) { 984 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 985 } 986 } 987 ierr = PetscFree(starts);CHKERRQ(ierr); 988 989 base = owners[rank]; 990 991 /* wait on receives */ 992 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 993 count = nrecvs; slen = 0; 994 while (count) { 995 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 996 /* unpack receives into our local space */ 997 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 998 999 source[imdex] = recv_status.MPI_SOURCE; 1000 lens[imdex] = n; 1001 slen += n; 1002 count--; 1003 } 1004 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1005 1006 /* move the data into the send scatter */ 1007 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 1008 count = 0; 1009 for (i=0; i<nrecvs; i++) { 1010 values = rvalues + i*nmax; 1011 for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base; 1012 } 1013 ierr = PetscFree(rvalues);CHKERRQ(ierr); 1014 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 1015 ierr = PetscFree(owner);CHKERRQ(ierr); 1016 ierr = PetscFree(nprocs);CHKERRQ(ierr); 1017 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 1018 1019 /* zero diagonal part of matrix */ 1020 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 1021 1022 /* handle off diagonal part of matrix */ 1023 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 1024 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1025 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1026 for (i=0; i<slen; i++) bb[lrows[i]] = 1; 1027 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1031 if (x) { 1032 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1035 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1036 } 1037 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1038 1039 /* remove zeroed rows of off diagonal matrix */ 1040 ii = aij->i; 1041 for (i=0; i<slen; i++) { 1042 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1043 } 1044 1045 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1046 if (aij->compressedrow.use) { 1047 m = aij->compressedrow.nrows; 1048 ii = aij->compressedrow.i; 1049 ridx = aij->compressedrow.rindex; 1050 for (i=0; i<m; i++) { 1051 n = ii[i+1] - ii[i]; 1052 aj = aij->j + ii[i]; 1053 aa = aij->a + ii[i]; 1054 1055 for (j=0; j<n; j++) { 1056 if (PetscAbsScalar(mask[*aj])) { 1057 if (b) bb[*ridx] -= *aa*xx[*aj]; 1058 *aa = 0.0; 1059 } 1060 aa++; 1061 aj++; 1062 } 1063 ridx++; 1064 } 1065 } else { /* do not use compressed row format */ 1066 m = l->B->rmap->n; 1067 for (i=0; i<m; i++) { 1068 n = ii[i+1] - ii[i]; 1069 aj = aij->j + ii[i]; 1070 aa = aij->a + ii[i]; 1071 for (j=0; j<n; j++) { 1072 if (PetscAbsScalar(mask[*aj])) { 1073 if (b) bb[i] -= *aa*xx[*aj]; 1074 *aa = 0.0; 1075 } 1076 aa++; 1077 aj++; 1078 } 1079 } 1080 } 1081 if (x) { 1082 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1083 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1084 } 1085 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1086 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1087 ierr = PetscFree(lrows);CHKERRQ(ierr); 1088 1089 /* wait on sends */ 1090 if (nsends) { 1091 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1092 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1093 ierr = PetscFree(send_status);CHKERRQ(ierr); 1094 } 1095 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1096 ierr = PetscFree(svalues);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatMult_MPIAIJ" 1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1103 { 1104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1105 PetscErrorCode ierr; 1106 PetscInt nt; 1107 1108 PetscFunctionBegin; 1109 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1110 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1111 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1112 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1113 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 #undef __FUNCT__ 1131 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1135 PetscErrorCode ierr; 1136 1137 PetscFunctionBegin; 1138 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1150 PetscErrorCode ierr; 1151 PetscBool merged; 1152 1153 PetscFunctionBegin; 1154 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1155 /* do nondiagonal part */ 1156 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1157 if (!merged) { 1158 /* send it on its way */ 1159 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1160 /* do local part */ 1161 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1162 /* receive remote parts: note this assumes the values are not actually */ 1163 /* added in yy until the next line, */ 1164 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1165 } else { 1166 /* do local part */ 1167 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1168 /* send it on its way */ 1169 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1170 /* values actually were received in the Begin() but we need to call this nop */ 1171 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1172 } 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1178 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1179 { 1180 MPI_Comm comm; 1181 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1182 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1183 IS Me,Notme; 1184 PetscErrorCode ierr; 1185 PetscInt M,N,first,last,*notme,i; 1186 PetscMPIInt size; 1187 1188 PetscFunctionBegin; 1189 /* Easy test: symmetric diagonal block */ 1190 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1191 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1192 if (!*f) PetscFunctionReturn(0); 1193 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1194 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1195 if (size == 1) PetscFunctionReturn(0); 1196 1197 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1198 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1199 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1200 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1201 for (i=0; i<first; i++) notme[i] = i; 1202 for (i=last; i<M; i++) notme[i-last+first] = i; 1203 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1204 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1205 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1206 Aoff = Aoffs[0]; 1207 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1208 Boff = Boffs[0]; 1209 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1210 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1211 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1212 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1213 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1214 ierr = PetscFree(notme);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1223 PetscErrorCode ierr; 1224 1225 PetscFunctionBegin; 1226 /* do nondiagonal part */ 1227 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1228 /* send it on its way */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 /* do local part */ 1231 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1232 /* receive remote parts */ 1233 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* 1238 This only works correctly for square matrices where the subblock A->A is the 1239 diagonal block 1240 */ 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1244 { 1245 PetscErrorCode ierr; 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 1248 PetscFunctionBegin; 1249 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1250 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1251 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNCT__ 1256 #define __FUNCT__ "MatScale_MPIAIJ" 1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1258 { 1259 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1260 PetscErrorCode ierr; 1261 1262 PetscFunctionBegin; 1263 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1264 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1265 PetscFunctionReturn(0); 1266 } 1267 1268 #undef __FUNCT__ 1269 #define __FUNCT__ "MatDestroy_MPIAIJ" 1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 1275 PetscFunctionBegin; 1276 #if defined(PETSC_USE_LOG) 1277 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1278 #endif 1279 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1281 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1282 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1283 #if defined(PETSC_USE_CTABLE) 1284 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1285 #else 1286 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1287 #endif 1288 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1289 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1290 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1291 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1292 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1293 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1294 1295 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 #undef __FUNCT__ 1308 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 PetscErrorCode ierr; 1315 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1316 int fd; 1317 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1318 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1319 PetscScalar *column_values; 1320 PetscInt message_count,flowcontrolcount; 1321 FILE *file; 1322 1323 PetscFunctionBegin; 1324 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1325 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1326 nz = A->nz + B->nz; 1327 if (!rank) { 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = mat->rmap->N; 1330 header[2] = mat->cmap->N; 1331 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 /* get largest number of rows any processor has */ 1336 rlen = mat->rmap->n; 1337 range = mat->rmap->range; 1338 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1339 } else { 1340 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1341 rlen = mat->rmap->n; 1342 } 1343 1344 /* load up the local row counts */ 1345 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1346 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1347 1348 /* store the row lengths to the file */ 1349 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1350 if (!rank) { 1351 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 for (i=1; i<size; i++) { 1353 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1354 rlen = range[i+1] - range[i]; 1355 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1356 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1357 } 1358 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1359 } else { 1360 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1361 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1362 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1363 } 1364 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1365 1366 /* load up the local column indices */ 1367 nzmax = nz; /* th processor needs space a largest processor needs */ 1368 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1370 cnt = 0; 1371 for (i=0; i<mat->rmap->n; i++) { 1372 for (j=B->i[i]; j<B->i[i+1]; j++) { 1373 if ((col = garray[B->j[j]]) > cstart) break; 1374 column_indices[cnt++] = col; 1375 } 1376 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1377 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1378 } 1379 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1380 1381 /* store the column indices to the file */ 1382 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1383 if (!rank) { 1384 MPI_Status status; 1385 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1386 for (i=1; i<size; i++) { 1387 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1388 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1389 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1390 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1394 } else { 1395 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1396 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1397 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1399 } 1400 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1401 1402 /* load up the local column values */ 1403 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1404 cnt = 0; 1405 for (i=0; i<mat->rmap->n; i++) { 1406 for (j=B->i[i]; j<B->i[i+1]; j++) { 1407 if (garray[B->j[j]] > cstart) break; 1408 column_values[cnt++] = B->a[j]; 1409 } 1410 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1411 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1412 } 1413 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1414 1415 /* store the column values to the file */ 1416 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1417 if (!rank) { 1418 MPI_Status status; 1419 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1420 for (i=1; i<size; i++) { 1421 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1422 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1423 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1424 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1425 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1426 } 1427 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1428 } else { 1429 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1430 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1431 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1432 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1433 } 1434 ierr = PetscFree(column_values);CHKERRQ(ierr); 1435 1436 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1437 if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #include <petscdraw.h> 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1445 { 1446 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1447 PetscErrorCode ierr; 1448 PetscMPIInt rank = aij->rank,size = aij->size; 1449 PetscBool isdraw,iascii,isbinary; 1450 PetscViewer sviewer; 1451 PetscViewerFormat format; 1452 1453 PetscFunctionBegin; 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1456 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1457 if (iascii) { 1458 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1459 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1460 MatInfo info; 1461 PetscBool inodes; 1462 1463 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1464 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1465 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1467 if (!inodes) { 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1469 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1470 } else { 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1472 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1473 } 1474 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1475 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1476 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1477 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1478 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1479 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1481 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1484 PetscInt inodecount,inodelimit,*inodes; 1485 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1486 if (inodes) { 1487 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1488 } else { 1489 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1490 } 1491 PetscFunctionReturn(0); 1492 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1493 PetscFunctionReturn(0); 1494 } 1495 } else if (isbinary) { 1496 if (size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 } else { 1500 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1501 } 1502 PetscFunctionReturn(0); 1503 } else if (isdraw) { 1504 PetscDraw draw; 1505 PetscBool isnull; 1506 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1507 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1508 } 1509 1510 if (size == 1) { 1511 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1512 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1513 } else { 1514 /* assemble the entire matrix onto first processor. */ 1515 Mat A; 1516 Mat_SeqAIJ *Aloc; 1517 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1518 MatScalar *a; 1519 1520 if (mat->rmap->N > 1024) { 1521 PetscBool flg = PETSC_FALSE; 1522 1523 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr); 1524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1525 } 1526 1527 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1528 if (!rank) { 1529 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1530 } else { 1531 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1532 } 1533 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1534 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1535 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1536 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1537 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1538 1539 /* copy over the A part */ 1540 Aloc = (Mat_SeqAIJ*)aij->A->data; 1541 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1542 row = mat->rmap->rstart; 1543 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1544 for (i=0; i<m; i++) { 1545 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1546 row++; 1547 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1548 } 1549 aj = Aloc->j; 1550 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1551 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqAIJ*)aij->B->data; 1554 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1555 row = mat->rmap->rstart; 1556 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1557 ct = cols; 1558 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1559 for (i=0; i<m; i++) { 1560 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1561 row++; 1562 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1563 } 1564 ierr = PetscFree(ct);CHKERRQ(ierr); 1565 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1566 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1567 /* 1568 Everyone has to call to draw the matrix since the graphics waits are 1569 synchronized across all processors that share the PetscDraw object 1570 */ 1571 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1572 if (!rank) { 1573 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1574 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1575 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1576 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1577 } 1578 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1579 ierr = MatDestroy(&A);CHKERRQ(ierr); 1580 } 1581 PetscFunctionReturn(0); 1582 } 1583 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatView_MPIAIJ" 1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1587 { 1588 PetscErrorCode ierr; 1589 PetscBool iascii,isdraw,issocket,isbinary; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1593 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1594 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1595 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1596 if (iascii || isdraw || isbinary || issocket) { 1597 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1598 } 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #undef __FUNCT__ 1603 #define __FUNCT__ "MatSOR_MPIAIJ" 1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1605 { 1606 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1607 PetscErrorCode ierr; 1608 Vec bb1 = 0; 1609 PetscBool hasop; 1610 1611 PetscFunctionBegin; 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1618 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1619 } 1620 1621 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1622 if (flag & SOR_ZERO_INITIAL_GUESS) { 1623 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1624 its--; 1625 } 1626 1627 while (its--) { 1628 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1629 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1630 1631 /* update rhs: bb1 = bb - B*x */ 1632 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1633 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1637 } 1638 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1639 if (flag & SOR_ZERO_INITIAL_GUESS) { 1640 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1641 its--; 1642 } 1643 while (its--) { 1644 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1645 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1646 1647 /* update rhs: bb1 = bb - B*x */ 1648 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1649 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1650 1651 /* local sweep */ 1652 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1653 } 1654 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1655 if (flag & SOR_ZERO_INITIAL_GUESS) { 1656 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1657 its--; 1658 } 1659 while (its--) { 1660 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1661 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1662 1663 /* update rhs: bb1 = bb - B*x */ 1664 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1665 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1666 1667 /* local sweep */ 1668 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1669 } 1670 } else if (flag & SOR_EISENSTAT) { 1671 Vec xx1; 1672 1673 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1674 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1675 1676 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1677 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1678 if (!mat->diag) { 1679 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1680 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1681 } 1682 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1683 if (hasop) { 1684 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1685 } else { 1686 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1687 } 1688 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1689 1690 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1691 1692 /* local sweep */ 1693 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1694 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1695 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1696 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1697 1698 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1699 PetscFunctionReturn(0); 1700 } 1701 1702 #undef __FUNCT__ 1703 #define __FUNCT__ "MatPermute_MPIAIJ" 1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1705 { 1706 Mat aA,aB,Aperm; 1707 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1708 PetscScalar *aa,*ba; 1709 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1710 PetscSF rowsf,sf; 1711 IS parcolp = NULL; 1712 PetscBool done; 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1717 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1718 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1719 ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr); 1720 1721 /* Invert row permutation to find out where my rows should go */ 1722 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1723 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1724 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1725 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1726 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1727 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1728 1729 /* Invert column permutation to find out where my columns should go */ 1730 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1731 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1732 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1733 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1734 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1735 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1737 1738 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1739 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1740 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1741 1742 /* Find out where my gcols should go */ 1743 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1744 ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr); 1745 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1746 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1747 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1748 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1749 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1750 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1751 1752 ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr); 1753 ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1754 ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1756 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1757 for (i=0; i<m; i++) { 1758 PetscInt row = rdest[i],rowner; 1759 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1760 for (j=ai[i]; j<ai[i+1]; j++) { 1761 PetscInt cowner,col = cdest[aj[j]]; 1762 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1763 if (rowner == cowner) dnnz[i]++; 1764 else onnz[i]++; 1765 } 1766 for (j=bi[i]; j<bi[i+1]; j++) { 1767 PetscInt cowner,col = gcdest[bj[j]]; 1768 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1769 if (rowner == cowner) dnnz[i]++; 1770 else onnz[i]++; 1771 } 1772 } 1773 ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1774 ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr); 1775 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1776 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1777 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1778 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1779 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1780 1781 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1782 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1783 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1784 for (i=0; i<m; i++) { 1785 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1786 PetscInt rowlen; 1787 rowlen = ai[i+1] - ai[i]; 1788 for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]]; 1789 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr); 1790 rowlen = bi[i+1] - bi[i]; 1791 for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]]; 1792 ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr); 1793 } 1794 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1795 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1796 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1797 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1798 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1799 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1800 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1801 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1802 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1803 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1804 *B = Aperm; 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1811 { 1812 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1813 Mat A = mat->A,B = mat->B; 1814 PetscErrorCode ierr; 1815 PetscReal isend[5],irecv[5]; 1816 1817 PetscFunctionBegin; 1818 info->block_size = 1.0; 1819 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1820 1821 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1822 isend[3] = info->memory; isend[4] = info->mallocs; 1823 1824 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1825 1826 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1827 isend[3] += info->memory; isend[4] += info->mallocs; 1828 if (flag == MAT_LOCAL) { 1829 info->nz_used = isend[0]; 1830 info->nz_allocated = isend[1]; 1831 info->nz_unneeded = isend[2]; 1832 info->memory = isend[3]; 1833 info->mallocs = isend[4]; 1834 } else if (flag == MAT_GLOBAL_MAX) { 1835 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1836 1837 info->nz_used = irecv[0]; 1838 info->nz_allocated = irecv[1]; 1839 info->nz_unneeded = irecv[2]; 1840 info->memory = irecv[3]; 1841 info->mallocs = irecv[4]; 1842 } else if (flag == MAT_GLOBAL_SUM) { 1843 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1844 1845 info->nz_used = irecv[0]; 1846 info->nz_allocated = irecv[1]; 1847 info->nz_unneeded = irecv[2]; 1848 info->memory = irecv[3]; 1849 info->mallocs = irecv[4]; 1850 } 1851 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1852 info->fill_ratio_needed = 0; 1853 info->factor_mallocs = 0; 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatSetOption_MPIAIJ" 1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1860 { 1861 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1862 PetscErrorCode ierr; 1863 1864 PetscFunctionBegin; 1865 switch (op) { 1866 case MAT_NEW_NONZERO_LOCATIONS: 1867 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1868 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1869 case MAT_KEEP_NONZERO_PATTERN: 1870 case MAT_NEW_NONZERO_LOCATION_ERR: 1871 case MAT_USE_INODES: 1872 case MAT_IGNORE_ZERO_ENTRIES: 1873 MatCheckPreallocated(A,1); 1874 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1875 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1876 break; 1877 case MAT_ROW_ORIENTED: 1878 a->roworiented = flg; 1879 1880 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1881 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1882 break; 1883 case MAT_NEW_DIAGONALS: 1884 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1885 break; 1886 case MAT_IGNORE_OFF_PROC_ENTRIES: 1887 a->donotstash = flg; 1888 break; 1889 case MAT_SPD: 1890 A->spd_set = PETSC_TRUE; 1891 A->spd = flg; 1892 if (flg) { 1893 A->symmetric = PETSC_TRUE; 1894 A->structurally_symmetric = PETSC_TRUE; 1895 A->symmetric_set = PETSC_TRUE; 1896 A->structurally_symmetric_set = PETSC_TRUE; 1897 } 1898 break; 1899 case MAT_SYMMETRIC: 1900 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1901 break; 1902 case MAT_STRUCTURALLY_SYMMETRIC: 1903 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1904 break; 1905 case MAT_HERMITIAN: 1906 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1907 break; 1908 case MAT_SYMMETRY_ETERNAL: 1909 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1910 break; 1911 default: 1912 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1913 } 1914 PetscFunctionReturn(0); 1915 } 1916 1917 #undef __FUNCT__ 1918 #define __FUNCT__ "MatGetRow_MPIAIJ" 1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1922 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1923 PetscErrorCode ierr; 1924 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1925 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1926 PetscInt *cmap,*idx_p; 1927 1928 PetscFunctionBegin; 1929 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1930 mat->getrowactive = PETSC_TRUE; 1931 1932 if (!mat->rowvalues && (idx || v)) { 1933 /* 1934 allocate enough space to hold information from the longest row. 1935 */ 1936 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1937 PetscInt max = 1,tmp; 1938 for (i=0; i<matin->rmap->n; i++) { 1939 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1940 if (max < tmp) max = tmp; 1941 } 1942 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1943 } 1944 1945 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1946 lrow = row - rstart; 1947 1948 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1949 if (!v) {pvA = 0; pvB = 0;} 1950 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1951 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 nztot = nzA + nzB; 1954 1955 cmap = mat->garray; 1956 if (v || idx) { 1957 if (nztot) { 1958 /* Sort by increasing column numbers, assuming A and B already sorted */ 1959 PetscInt imark = -1; 1960 if (v) { 1961 *v = v_p = mat->rowvalues; 1962 for (i=0; i<nzB; i++) { 1963 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1964 else break; 1965 } 1966 imark = i; 1967 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1968 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1969 } 1970 if (idx) { 1971 *idx = idx_p = mat->rowindices; 1972 if (imark > -1) { 1973 for (i=0; i<imark; i++) { 1974 idx_p[i] = cmap[cworkB[i]]; 1975 } 1976 } else { 1977 for (i=0; i<nzB; i++) { 1978 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1979 else break; 1980 } 1981 imark = i; 1982 } 1983 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1984 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1985 } 1986 } else { 1987 if (idx) *idx = 0; 1988 if (v) *v = 0; 1989 } 1990 } 1991 *nz = nztot; 1992 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1993 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2002 2003 PetscFunctionBegin; 2004 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 2005 aij->getrowactive = PETSC_FALSE; 2006 PetscFunctionReturn(0); 2007 } 2008 2009 #undef __FUNCT__ 2010 #define __FUNCT__ "MatNorm_MPIAIJ" 2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 2012 { 2013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2014 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 2015 PetscErrorCode ierr; 2016 PetscInt i,j,cstart = mat->cmap->rstart; 2017 PetscReal sum = 0.0; 2018 MatScalar *v; 2019 2020 PetscFunctionBegin; 2021 if (aij->size == 1) { 2022 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 2023 } else { 2024 if (type == NORM_FROBENIUS) { 2025 v = amat->a; 2026 for (i=0; i<amat->nz; i++) { 2027 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2028 } 2029 v = bmat->a; 2030 for (i=0; i<bmat->nz; i++) { 2031 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 2032 } 2033 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2034 *norm = PetscSqrtReal(*norm); 2035 } else if (type == NORM_1) { /* max column norm */ 2036 PetscReal *tmp,*tmp2; 2037 PetscInt *jj,*garray = aij->garray; 2038 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 2039 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 2040 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 2041 *norm = 0.0; 2042 v = amat->a; jj = amat->j; 2043 for (j=0; j<amat->nz; j++) { 2044 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2045 } 2046 v = bmat->a; jj = bmat->j; 2047 for (j=0; j<bmat->nz; j++) { 2048 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2049 } 2050 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2051 for (j=0; j<mat->cmap->N; j++) { 2052 if (tmp2[j] > *norm) *norm = tmp2[j]; 2053 } 2054 ierr = PetscFree(tmp);CHKERRQ(ierr); 2055 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2056 } else if (type == NORM_INFINITY) { /* max row norm */ 2057 PetscReal ntemp = 0.0; 2058 for (j=0; j<aij->A->rmap->n; j++) { 2059 v = amat->a + amat->i[j]; 2060 sum = 0.0; 2061 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2062 sum += PetscAbsScalar(*v); v++; 2063 } 2064 v = bmat->a + bmat->i[j]; 2065 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2066 sum += PetscAbsScalar(*v); v++; 2067 } 2068 if (sum > ntemp) ntemp = sum; 2069 } 2070 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2071 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatTranspose_MPIAIJ" 2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2079 { 2080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2081 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2082 PetscErrorCode ierr; 2083 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 2084 PetscInt cstart = A->cmap->rstart,ncol; 2085 Mat B; 2086 MatScalar *array; 2087 2088 PetscFunctionBegin; 2089 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2090 2091 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2092 ai = Aloc->i; aj = Aloc->j; 2093 bi = Bloc->i; bj = Bloc->j; 2094 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2095 PetscInt *d_nnz,*g_nnz,*o_nnz; 2096 PetscSFNode *oloc; 2097 PETSC_UNUSED PetscSF sf; 2098 2099 ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr); 2100 /* compute d_nnz for preallocation */ 2101 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2102 for (i=0; i<ai[ma]; i++) { 2103 d_nnz[aj[i]]++; 2104 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2105 } 2106 /* compute local off-diagonal contributions */ 2107 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2108 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2109 /* map those to global */ 2110 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2111 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2112 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2113 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2114 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2115 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2116 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2117 2118 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2119 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2120 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr); 2121 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2122 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2123 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2124 } else { 2125 B = *matout; 2126 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2127 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2128 } 2129 2130 /* copy over the A part */ 2131 array = Aloc->a; 2132 row = A->rmap->rstart; 2133 for (i=0; i<ma; i++) { 2134 ncol = ai[i+1]-ai[i]; 2135 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2136 row++; 2137 array += ncol; aj += ncol; 2138 } 2139 aj = Aloc->j; 2140 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2141 2142 /* copy over the B part */ 2143 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2144 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2145 array = Bloc->a; 2146 row = A->rmap->rstart; 2147 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2148 cols_tmp = cols; 2149 for (i=0; i<mb; i++) { 2150 ncol = bi[i+1]-bi[i]; 2151 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2152 row++; 2153 array += ncol; cols_tmp += ncol; 2154 } 2155 ierr = PetscFree(cols);CHKERRQ(ierr); 2156 2157 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2158 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2159 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2160 *matout = B; 2161 } else { 2162 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 #undef __FUNCT__ 2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2170 { 2171 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2172 Mat a = aij->A,b = aij->B; 2173 PetscErrorCode ierr; 2174 PetscInt s1,s2,s3; 2175 2176 PetscFunctionBegin; 2177 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2178 if (rr) { 2179 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2180 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2181 /* Overlap communication with computation. */ 2182 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2183 } 2184 if (ll) { 2185 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2186 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2187 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2188 } 2189 /* scale the diagonal block */ 2190 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2191 2192 if (rr) { 2193 /* Do a scatter end and then right scale the off-diagonal block */ 2194 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2195 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 #undef __FUNCT__ 2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 #undef __FUNCT__ 2213 #define __FUNCT__ "MatEqual_MPIAIJ" 2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2215 { 2216 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2217 Mat a,b,c,d; 2218 PetscBool flg; 2219 PetscErrorCode ierr; 2220 2221 PetscFunctionBegin; 2222 a = matA->A; b = matA->B; 2223 c = matB->A; d = matB->B; 2224 2225 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2226 if (flg) { 2227 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2228 } 2229 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2230 PetscFunctionReturn(0); 2231 } 2232 2233 #undef __FUNCT__ 2234 #define __FUNCT__ "MatCopy_MPIAIJ" 2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2236 { 2237 PetscErrorCode ierr; 2238 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2239 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2240 2241 PetscFunctionBegin; 2242 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2243 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2244 /* because of the column compression in the off-processor part of the matrix a->B, 2245 the number of columns in a->B and b->B may be different, hence we cannot call 2246 the MatCopy() directly on the two parts. If need be, we can provide a more 2247 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2248 then copying the submatrices */ 2249 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2250 } else { 2251 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2252 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2253 } 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #undef __FUNCT__ 2258 #define __FUNCT__ "MatSetUp_MPIAIJ" 2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 #undef __FUNCT__ 2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2272 { 2273 PetscInt i,m=Y->rmap->N; 2274 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2275 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2276 const PetscInt *xi = x->i,*yi = y->i; 2277 2278 PetscFunctionBegin; 2279 /* Set the number of nonzeros in the new matrix */ 2280 for (i=0; i<m; i++) { 2281 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2282 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2283 nnz[i] = 0; 2284 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2285 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2286 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2287 nnz[i]++; 2288 } 2289 for (; k<nzy; k++) nnz[i]++; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 #undef __FUNCT__ 2295 #define __FUNCT__ "MatAXPY_MPIAIJ" 2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2297 { 2298 PetscErrorCode ierr; 2299 PetscInt i; 2300 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2301 PetscBLASInt bnz,one=1; 2302 Mat_SeqAIJ *x,*y; 2303 2304 PetscFunctionBegin; 2305 if (str == SAME_NONZERO_PATTERN) { 2306 PetscScalar alpha = a; 2307 x = (Mat_SeqAIJ*)xx->A->data; 2308 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2309 y = (Mat_SeqAIJ*)yy->A->data; 2310 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2311 x = (Mat_SeqAIJ*)xx->B->data; 2312 y = (Mat_SeqAIJ*)yy->B->data; 2313 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2314 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2315 } else if (str == SUBSET_NONZERO_PATTERN) { 2316 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2317 2318 x = (Mat_SeqAIJ*)xx->B->data; 2319 y = (Mat_SeqAIJ*)yy->B->data; 2320 if (y->xtoy && y->XtoY != xx->B) { 2321 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2322 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2323 } 2324 if (!y->xtoy) { /* get xtoy */ 2325 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2326 y->XtoY = xx->B; 2327 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2328 } 2329 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2330 } else { 2331 Mat B; 2332 PetscInt *nnz_d,*nnz_o; 2333 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2334 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2335 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2336 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2337 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2338 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2339 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2340 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2341 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2342 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2343 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2344 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2345 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2346 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2347 } 2348 PetscFunctionReturn(0); 2349 } 2350 2351 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2352 2353 #undef __FUNCT__ 2354 #define __FUNCT__ "MatConjugate_MPIAIJ" 2355 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2356 { 2357 #if defined(PETSC_USE_COMPLEX) 2358 PetscErrorCode ierr; 2359 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2360 2361 PetscFunctionBegin; 2362 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2363 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2364 #else 2365 PetscFunctionBegin; 2366 #endif 2367 PetscFunctionReturn(0); 2368 } 2369 2370 #undef __FUNCT__ 2371 #define __FUNCT__ "MatRealPart_MPIAIJ" 2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2373 { 2374 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2375 PetscErrorCode ierr; 2376 2377 PetscFunctionBegin; 2378 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2379 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #undef __FUNCT__ 2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2386 { 2387 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2392 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 #if defined(PETSC_HAVE_PBGL) 2397 2398 #include <boost/parallel/mpi/bsp_process_group.hpp> 2399 #include <boost/graph/distributed/ilu_default_graph.hpp> 2400 #include <boost/graph/distributed/ilu_0_block.hpp> 2401 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2402 #include <boost/graph/distributed/petsc/interface.hpp> 2403 #include <boost/multi_array.hpp> 2404 #include <boost/parallel/distributed_property_map->hpp> 2405 2406 #undef __FUNCT__ 2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2408 /* 2409 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2410 */ 2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2412 { 2413 namespace petsc = boost::distributed::petsc; 2414 2415 namespace graph_dist = boost::graph::distributed; 2416 using boost::graph::distributed::ilu_default::process_group_type; 2417 using boost::graph::ilu_permuted; 2418 2419 PetscBool row_identity, col_identity; 2420 PetscContainer c; 2421 PetscInt m, n, M, N; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2426 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2427 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2428 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2429 2430 process_group_type pg; 2431 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2432 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2433 lgraph_type& level_graph = *lgraph_p; 2434 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2435 2436 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2437 ilu_permuted(level_graph); 2438 2439 /* put together the new matrix */ 2440 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2441 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2442 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2443 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 2445 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2446 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2447 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2448 2449 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2450 ierr = PetscContainerSetPointer(c, lgraph_p); 2451 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2452 ierr = PetscContainerDestroy(&c); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 #undef __FUNCT__ 2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2459 { 2460 PetscFunctionBegin; 2461 PetscFunctionReturn(0); 2462 } 2463 2464 #undef __FUNCT__ 2465 #define __FUNCT__ "MatSolve_MPIAIJ" 2466 /* 2467 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2468 */ 2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2470 { 2471 namespace graph_dist = boost::graph::distributed; 2472 2473 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2474 lgraph_type *lgraph_p; 2475 PetscContainer c; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2480 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2481 ierr = VecCopy(b, x);CHKERRQ(ierr); 2482 2483 PetscScalar *array_x; 2484 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2485 PetscInt sx; 2486 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2487 2488 PetscScalar *array_b; 2489 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2490 PetscInt sb; 2491 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2492 2493 lgraph_type& level_graph = *lgraph_p; 2494 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2495 2496 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2497 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2498 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2499 2500 typedef boost::iterator_property_map<array_ref_type::iterator, 2501 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2502 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2503 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2504 2505 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2506 PetscFunctionReturn(0); 2507 } 2508 #endif 2509 2510 #undef __FUNCT__ 2511 #define __FUNCT__ "MatDestroy_MatRedundant" 2512 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2513 { 2514 PetscErrorCode ierr; 2515 Mat_Redundant *redund; 2516 PetscInt i; 2517 PetscMPIInt size; 2518 2519 PetscFunctionBegin; 2520 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2521 if (size == 1) { 2522 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2523 redund = a->redundant; 2524 } else { 2525 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2526 redund = a->redundant; 2527 } 2528 if (redund){ 2529 if (redund->matseq) { /* '-new' option */ 2530 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 2531 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 2532 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 2533 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 2534 } else { 2535 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2536 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2537 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2538 for (i=0; i<redund->nrecvs; i++) { 2539 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2540 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2541 } 2542 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2543 2544 if (redund->psubcomm) { 2545 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 2546 } 2547 } 2548 2549 ierr = redund->Destroy(A);CHKERRQ(ierr); 2550 ierr = PetscFree(redund);CHKERRQ(ierr); 2551 } 2552 PetscFunctionReturn(0); 2553 } 2554 2555 #undef __FUNCT__ 2556 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_psubcomm" 2557 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_psubcomm(Mat mat,PetscInt nsubcomm,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant) 2558 { 2559 PetscMPIInt rank,size; 2560 MPI_Comm comm,subcomm=psubcomm->comm; 2561 PetscErrorCode ierr; 2562 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2563 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2564 PetscInt *rowrange = mat->rmap->range; 2565 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2566 Mat A = aij->A,B=aij->B,C=*matredundant; 2567 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2568 PetscScalar *sbuf_a; 2569 PetscInt nzlocal=a->nz+b->nz; 2570 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2571 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2572 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2573 MatScalar *aworkA,*aworkB; 2574 PetscScalar *vals; 2575 PetscMPIInt tag1,tag2,tag3,imdex; 2576 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2577 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2578 MPI_Status recv_status,*send_status; 2579 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2580 PetscInt **rbuf_j=NULL; 2581 PetscScalar **rbuf_a=NULL; 2582 Mat_Redundant *redund =NULL; 2583 PetscBool flg=PETSC_FALSE; 2584 2585 PetscFunctionBegin; 2586 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2587 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2588 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2589 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2590 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2591 2592 /* ---------- new imples: use MatGetSubMatrices() ------------*/ 2593 ierr = PetscOptionsGetBool(NULL,"-new",&flg,NULL);CHKERRQ(ierr); 2594 if (flg) { 2595 Mat *matseq; 2596 IS isrow,iscol; 2597 PetscInt mloc_sub,rstart,rend; 2598 2599 if (reuse == MAT_INITIAL_MATRIX) { 2600 /* create a local sequential matrix matseq[0] */ 2601 mloc_sub = PETSC_DECIDE; 2602 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2603 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2604 rstart = rend - mloc_sub; 2605 /* printf("[%d] Use MatGetSubMatrices()...rows %d - %d, mloc_sub %d\n",rank,rstart,rend,mloc_sub); */ 2606 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2607 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2608 } else { /* reuse == MAT_REUSE_MATRIX */ 2609 if (subsize == 1) { 2610 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2611 redund = c->redundant; 2612 } else { 2613 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2614 redund = c->redundant; 2615 } 2616 2617 isrow = redund->isrow; 2618 iscol = redund->iscol; 2619 matseq = redund->matseq; 2620 } 2621 2622 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2623 /* 2624 if (rank==0) { 2625 printf("[%d] matsub:\n",rank); 2626 ierr = MatView(matseq[0],PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 2627 } 2628 ierr = MPI_Barrier(comm);CHKERRQ(ierr); 2629 */ 2630 2631 /* Create matredundant by concatenating matseq[0] from processors in this subcomm */ 2632 /* 2633 if (reuse == MAT_REUSE_MATRIX) { 2634 if (!rank) printf("matredundant:\n"); 2635 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2636 } 2637 */ 2638 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2639 /* 2640 if (nsubcomm == 1) { 2641 if (!rank) printf( "matredundant\n"); 2642 ierr = MatView(*matredundant,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 2643 } */ 2644 2645 if (reuse == MAT_INITIAL_MATRIX) { 2646 /* create a supporting struct and attach it to C for reuse */ 2647 ierr = PetscNewLog(*matredundant,Mat_Redundant,&redund);CHKERRQ(ierr); 2648 if (subsize == 1) { 2649 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2650 c->redundant = redund; 2651 } else { 2652 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2653 c->redundant = redund; 2654 } 2655 2656 redund->isrow = isrow; 2657 redund->iscol = iscol; 2658 redund->matseq = matseq; 2659 redund->psubcomm = NULL; 2660 2661 redund->Destroy = (*matredundant)->ops->destroy; 2662 (*matredundant)->ops->destroy = MatDestroy_MatRedundant; 2663 } 2664 PetscFunctionReturn(0); 2665 } 2666 /* ----------------------------------------------------*/ 2667 2668 if (reuse == MAT_REUSE_MATRIX) { 2669 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2670 if (subsize == 1) { 2671 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2672 redund = c->redundant; 2673 } else { 2674 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2675 redund = c->redundant; 2676 } 2677 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2678 2679 nsends = redund->nsends; 2680 nrecvs = redund->nrecvs; 2681 send_rank = redund->send_rank; 2682 recv_rank = redund->recv_rank; 2683 sbuf_nz = redund->sbuf_nz; 2684 rbuf_nz = redund->rbuf_nz; 2685 sbuf_j = redund->sbuf_j; 2686 sbuf_a = redund->sbuf_a; 2687 rbuf_j = redund->rbuf_j; 2688 rbuf_a = redund->rbuf_a; 2689 } 2690 2691 if (reuse == MAT_INITIAL_MATRIX) { 2692 PetscInt nleftover,np_subcomm; 2693 2694 /* get the destination processors' id send_rank, nsends and nrecvs */ 2695 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr); 2696 2697 np_subcomm = size/nsubcomm; 2698 nleftover = size - nsubcomm*np_subcomm; 2699 2700 nsends = 0; nrecvs = 0; 2701 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2702 /* -------------------------------------------*/ 2703 for (i=0; i<size; i++) { 2704 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2705 send_rank[nsends] = i; nsends++; 2706 recv_rank[nrecvs++] = i; 2707 /* printf("[%d] send to and recv from [%d]\n",rank,i); */ 2708 } 2709 } 2710 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2711 i = size-nleftover-1; 2712 j = 0; 2713 while (j < nsubcomm - nleftover) { 2714 send_rank[nsends++] = i; 2715 i--; j++; 2716 /* printf("[%d] send to [%d]\n",rank,i); */ 2717 } 2718 } 2719 2720 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2721 for (i=0; i<nleftover; i++) { 2722 recv_rank[nrecvs++] = size-nleftover+i; 2723 /* printf("[%d] recv from [%d]\n",rank,i); */ 2724 } 2725 } 2726 } else if (psubcomm->type == PETSC_SUBCOMM_CONTIGUOUS) { 2727 /* --------------------------------------------------*/ 2728 PetscInt color,subcommstart; 2729 subcommstart=0; 2730 for (color=0; color<nsubcomm; color++) { 2731 if (psubcomm->color != color) { 2732 for (i=0; i<psubcomm->subsize[color]; i++) { 2733 if (subrank == i) { /* my_subrank == other's subrank */ 2734 send_rank[nsends++] = subcommstart+i; 2735 recv_rank[nrecvs++] = subcommstart+i; 2736 /* printf("[%d] send to and recv from [%d]\n",rank,subcommstart+i); */ 2737 } 2738 } 2739 } 2740 subcommstart += psubcomm->subsize[color]; 2741 } 2742 if (nleftover && subrank == size/nsubcomm) { /* this proc is a leftover proc, send to subcomm that does not have leftover proc */ 2743 subcommstart=0; 2744 for (color=0; color<nsubcomm; color++) { 2745 subcommstart += psubcomm->subsize[color]; 2746 if (psubcomm->color == color) continue; 2747 if (psubcomm->subsize[color] == size/nsubcomm) { /* subcomm does not have leftover proc */ 2748 send_rank[nsends++] = subcommstart -1; /* send to the last proc of subcomm[color] */ 2749 /* printf("[%d] leftover send to [%d] \n",rank,subcommstart -1); */ 2750 } 2751 } 2752 } 2753 2754 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2755 subcommstart=0; 2756 for (color=0; color<nsubcomm; color++) { 2757 subcommstart += psubcomm->subsize[color]; 2758 if (psubcomm->subsize[color] > size/nsubcomm) { /* subcomm has leftover proc */ 2759 recv_rank[nrecvs++] = subcommstart -1; /* recv from the last proc of subcomm[color] */ 2760 /* printf("[%d] recv from [%d]\n",rank,subcommstart -1); */ 2761 } 2762 } 2763 } 2764 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for PetscSubcomm type %D",psubcomm->type); 2765 2766 /* allocate sbuf_j, sbuf_a */ 2767 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2768 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2769 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2770 /* 2771 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2772 ierr = PetscSynchronizedFlush(comm);CHKERRQ(ierr); 2773 */ 2774 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2775 2776 /* copy mat's local entries into the buffers */ 2777 if (reuse == MAT_INITIAL_MATRIX) { 2778 rownz_max = 0; 2779 rptr = sbuf_j; 2780 cols = sbuf_j + rend-rstart + 1; 2781 vals = sbuf_a; 2782 rptr[0] = 0; 2783 for (i=0; i<rend-rstart; i++) { 2784 row = i + rstart; 2785 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2786 ncols = nzA + nzB; 2787 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2788 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2789 /* load the column indices for this row into cols */ 2790 lwrite = 0; 2791 for (l=0; l<nzB; l++) { 2792 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2793 vals[lwrite] = aworkB[l]; 2794 cols[lwrite++] = ctmp; 2795 } 2796 } 2797 for (l=0; l<nzA; l++) { 2798 vals[lwrite] = aworkA[l]; 2799 cols[lwrite++] = cstart + cworkA[l]; 2800 } 2801 for (l=0; l<nzB; l++) { 2802 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2803 vals[lwrite] = aworkB[l]; 2804 cols[lwrite++] = ctmp; 2805 } 2806 } 2807 vals += ncols; 2808 cols += ncols; 2809 rptr[i+1] = rptr[i] + ncols; 2810 if (rownz_max < ncols) rownz_max = ncols; 2811 } 2812 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2813 } else { /* only copy matrix values into sbuf_a */ 2814 rptr = sbuf_j; 2815 vals = sbuf_a; 2816 rptr[0] = 0; 2817 for (i=0; i<rend-rstart; i++) { 2818 row = i + rstart; 2819 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2820 ncols = nzA + nzB; 2821 cworkB = b->j + b->i[i]; 2822 aworkA = a->a + a->i[i]; 2823 aworkB = b->a + b->i[i]; 2824 lwrite = 0; 2825 for (l=0; l<nzB; l++) { 2826 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2827 } 2828 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2829 for (l=0; l<nzB; l++) { 2830 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2831 } 2832 vals += ncols; 2833 rptr[i+1] = rptr[i] + ncols; 2834 } 2835 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2836 2837 /* send nzlocal to others, and recv other's nzlocal */ 2838 /*--------------------------------------------------*/ 2839 if (reuse == MAT_INITIAL_MATRIX) { 2840 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2841 2842 s_waits2 = s_waits3 + nsends; 2843 s_waits1 = s_waits2 + nsends; 2844 r_waits1 = s_waits1 + nsends; 2845 r_waits2 = r_waits1 + nrecvs; 2846 r_waits3 = r_waits2 + nrecvs; 2847 } else { 2848 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2849 2850 r_waits3 = s_waits3 + nsends; 2851 } 2852 2853 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2854 if (reuse == MAT_INITIAL_MATRIX) { 2855 /* get new tags to keep the communication clean */ 2856 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2857 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2858 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2859 2860 /* post receives of other's nzlocal */ 2861 for (i=0; i<nrecvs; i++) { 2862 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2863 } 2864 /* send nzlocal to others */ 2865 for (i=0; i<nsends; i++) { 2866 sbuf_nz[i] = nzlocal; 2867 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2868 } 2869 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2870 count = nrecvs; 2871 while (count) { 2872 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2873 2874 recv_rank[imdex] = recv_status.MPI_SOURCE; 2875 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2876 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2877 2878 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2879 2880 rbuf_nz[imdex] += i + 2; 2881 2882 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2883 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2884 count--; 2885 } 2886 /* wait on sends of nzlocal */ 2887 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2888 /* send mat->i,j to others, and recv from other's */ 2889 /*------------------------------------------------*/ 2890 for (i=0; i<nsends; i++) { 2891 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2892 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2893 } 2894 /* wait on receives of mat->i,j */ 2895 /*------------------------------*/ 2896 count = nrecvs; 2897 while (count) { 2898 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2899 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2900 count--; 2901 } 2902 /* wait on sends of mat->i,j */ 2903 /*---------------------------*/ 2904 if (nsends) { 2905 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2906 } 2907 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2908 2909 /* post receives, send and receive mat->a */ 2910 /*----------------------------------------*/ 2911 for (imdex=0; imdex<nrecvs; imdex++) { 2912 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2913 } 2914 for (i=0; i<nsends; i++) { 2915 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2916 } 2917 count = nrecvs; 2918 while (count) { 2919 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2920 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2921 count--; 2922 } 2923 if (nsends) { 2924 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2925 } 2926 2927 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2928 2929 /* create redundant matrix */ 2930 /*-------------------------*/ 2931 if (reuse == MAT_INITIAL_MATRIX) { 2932 const PetscInt *range; 2933 PetscInt rstart_sub,rend_sub,mloc_sub; 2934 2935 /* compute rownz_max for preallocation */ 2936 for (imdex=0; imdex<nrecvs; imdex++) { 2937 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2938 rptr = rbuf_j[imdex]; 2939 for (i=0; i<j; i++) { 2940 ncols = rptr[i+1] - rptr[i]; 2941 if (rownz_max < ncols) rownz_max = ncols; 2942 } 2943 } 2944 2945 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2946 2947 /* get local size of redundant matrix 2948 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2949 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2950 rstart_sub = range[nsubcomm*subrank]; 2951 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2952 rend_sub = range[nsubcomm*(subrank+1)]; 2953 } else { 2954 rend_sub = mat->rmap->N; 2955 } 2956 mloc_sub = rend_sub - rstart_sub; 2957 2958 if (M == N) { 2959 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2960 } else { /* non-square matrix */ 2961 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2962 } 2963 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr); 2964 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2965 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2966 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2967 } else { 2968 C = *matredundant; 2969 } 2970 2971 /* insert local matrix entries */ 2972 rptr = sbuf_j; 2973 cols = sbuf_j + rend-rstart + 1; 2974 vals = sbuf_a; 2975 for (i=0; i<rend-rstart; i++) { 2976 row = i + rstart; 2977 ncols = rptr[i+1] - rptr[i]; 2978 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2979 vals += ncols; 2980 cols += ncols; 2981 } 2982 /* insert received matrix entries */ 2983 for (imdex=0; imdex<nrecvs; imdex++) { 2984 rstart = rowrange[recv_rank[imdex]]; 2985 rend = rowrange[recv_rank[imdex]+1]; 2986 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2987 rptr = rbuf_j[imdex]; 2988 cols = rbuf_j[imdex] + rend-rstart + 1; 2989 vals = rbuf_a[imdex]; 2990 for (i=0; i<rend-rstart; i++) { 2991 row = i + rstart; 2992 ncols = rptr[i+1] - rptr[i]; 2993 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2994 vals += ncols; 2995 cols += ncols; 2996 } 2997 } 2998 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 3001 if (reuse == MAT_INITIAL_MATRIX) { 3002 *matredundant = C; 3003 3004 /* create a supporting struct and attach it to C for reuse */ 3005 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 3006 if (subsize == 1) { 3007 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 3008 c->redundant = redund; 3009 } else { 3010 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 3011 c->redundant = redund; 3012 } 3013 3014 redund->nzlocal = nzlocal; 3015 redund->nsends = nsends; 3016 redund->nrecvs = nrecvs; 3017 redund->send_rank = send_rank; 3018 redund->recv_rank = recv_rank; 3019 redund->sbuf_nz = sbuf_nz; 3020 redund->rbuf_nz = rbuf_nz; 3021 redund->sbuf_j = sbuf_j; 3022 redund->sbuf_a = sbuf_a; 3023 redund->rbuf_j = rbuf_j; 3024 redund->rbuf_a = rbuf_a; 3025 redund->psubcomm = NULL; 3026 3027 redund->Destroy = C->ops->destroy; 3028 C->ops->destroy = MatDestroy_MatRedundant; 3029 } 3030 PetscFunctionReturn(0); 3031 } 3032 3033 #undef __FUNCT__ 3034 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 3035 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 3036 { 3037 PetscErrorCode ierr; 3038 3039 PetscFunctionBegin; 3040 if (subcomm == MPI_COMM_NULL || subcomm == PETSC_COMM_SELF) { /* create psubcomm */ 3041 MPI_Comm comm; 3042 PetscSubcomm psubcomm; 3043 PetscMPIInt size,subsize; 3044 3045 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3046 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3047 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 3048 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 3049 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_INTERLACED);CHKERRQ(ierr); 3050 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 3051 3052 ierr = MatGetRedundantMatrix_MPIAIJ_psubcomm(mat,nsubcomm,psubcomm,reuse,matredundant);CHKERRQ(ierr); 3053 3054 /* free psubcomm in MatDestroy_MatRedundant() */ 3055 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 3056 if (subsize == 1) { 3057 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 3058 c->redundant->psubcomm = psubcomm; 3059 } else { 3060 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 3061 c->redundant->psubcomm = psubcomm ; 3062 } 3063 } else { 3064 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet"); 3065 } 3066 PetscFunctionReturn(0); 3067 } 3068 3069 #undef __FUNCT__ 3070 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 3071 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3072 { 3073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3074 PetscErrorCode ierr; 3075 PetscInt i,*idxb = 0; 3076 PetscScalar *va,*vb; 3077 Vec vtmp; 3078 3079 PetscFunctionBegin; 3080 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 3081 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3082 if (idx) { 3083 for (i=0; i<A->rmap->n; i++) { 3084 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3085 } 3086 } 3087 3088 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3089 if (idx) { 3090 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3091 } 3092 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3093 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3094 3095 for (i=0; i<A->rmap->n; i++) { 3096 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3097 va[i] = vb[i]; 3098 if (idx) idx[i] = a->garray[idxb[i]]; 3099 } 3100 } 3101 3102 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3103 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3104 ierr = PetscFree(idxb);CHKERRQ(ierr); 3105 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3106 PetscFunctionReturn(0); 3107 } 3108 3109 #undef __FUNCT__ 3110 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 3111 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3112 { 3113 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3114 PetscErrorCode ierr; 3115 PetscInt i,*idxb = 0; 3116 PetscScalar *va,*vb; 3117 Vec vtmp; 3118 3119 PetscFunctionBegin; 3120 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 3121 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 3122 if (idx) { 3123 for (i=0; i<A->cmap->n; i++) { 3124 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 3125 } 3126 } 3127 3128 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 3129 if (idx) { 3130 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 3131 } 3132 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 3133 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 3134 3135 for (i=0; i<A->rmap->n; i++) { 3136 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 3137 va[i] = vb[i]; 3138 if (idx) idx[i] = a->garray[idxb[i]]; 3139 } 3140 } 3141 3142 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 3143 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 3144 ierr = PetscFree(idxb);CHKERRQ(ierr); 3145 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 3146 PetscFunctionReturn(0); 3147 } 3148 3149 #undef __FUNCT__ 3150 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 3151 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3152 { 3153 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3154 PetscInt n = A->rmap->n; 3155 PetscInt cstart = A->cmap->rstart; 3156 PetscInt *cmap = mat->garray; 3157 PetscInt *diagIdx, *offdiagIdx; 3158 Vec diagV, offdiagV; 3159 PetscScalar *a, *diagA, *offdiagA; 3160 PetscInt r; 3161 PetscErrorCode ierr; 3162 3163 PetscFunctionBegin; 3164 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3165 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 3166 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 3167 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3168 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3169 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3170 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3171 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3172 for (r = 0; r < n; ++r) { 3173 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 3174 a[r] = diagA[r]; 3175 idx[r] = cstart + diagIdx[r]; 3176 } else { 3177 a[r] = offdiagA[r]; 3178 idx[r] = cmap[offdiagIdx[r]]; 3179 } 3180 } 3181 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3182 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3183 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3184 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3185 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3186 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 3190 #undef __FUNCT__ 3191 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 3192 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 3193 { 3194 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 3195 PetscInt n = A->rmap->n; 3196 PetscInt cstart = A->cmap->rstart; 3197 PetscInt *cmap = mat->garray; 3198 PetscInt *diagIdx, *offdiagIdx; 3199 Vec diagV, offdiagV; 3200 PetscScalar *a, *diagA, *offdiagA; 3201 PetscInt r; 3202 PetscErrorCode ierr; 3203 3204 PetscFunctionBegin; 3205 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 3206 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 3207 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 3208 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 3209 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 3210 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 3211 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 3212 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3213 for (r = 0; r < n; ++r) { 3214 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 3215 a[r] = diagA[r]; 3216 idx[r] = cstart + diagIdx[r]; 3217 } else { 3218 a[r] = offdiagA[r]; 3219 idx[r] = cmap[offdiagIdx[r]]; 3220 } 3221 } 3222 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 3223 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 3224 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 3225 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 3226 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 3227 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 #undef __FUNCT__ 3232 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 3233 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 3234 { 3235 PetscErrorCode ierr; 3236 Mat *dummy; 3237 3238 PetscFunctionBegin; 3239 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 3240 *newmat = *dummy; 3241 ierr = PetscFree(dummy);CHKERRQ(ierr); 3242 PetscFunctionReturn(0); 3243 } 3244 3245 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3246 3247 #undef __FUNCT__ 3248 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3249 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3250 { 3251 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3256 PetscFunctionReturn(0); 3257 } 3258 3259 #undef __FUNCT__ 3260 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3261 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3262 { 3263 PetscErrorCode ierr; 3264 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3265 3266 PetscFunctionBegin; 3267 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3268 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3269 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3270 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3271 PetscFunctionReturn(0); 3272 } 3273 3274 /* -------------------------------------------------------------------*/ 3275 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3276 MatGetRow_MPIAIJ, 3277 MatRestoreRow_MPIAIJ, 3278 MatMult_MPIAIJ, 3279 /* 4*/ MatMultAdd_MPIAIJ, 3280 MatMultTranspose_MPIAIJ, 3281 MatMultTransposeAdd_MPIAIJ, 3282 #if defined(PETSC_HAVE_PBGL) 3283 MatSolve_MPIAIJ, 3284 #else 3285 0, 3286 #endif 3287 0, 3288 0, 3289 /*10*/ 0, 3290 0, 3291 0, 3292 MatSOR_MPIAIJ, 3293 MatTranspose_MPIAIJ, 3294 /*15*/ MatGetInfo_MPIAIJ, 3295 MatEqual_MPIAIJ, 3296 MatGetDiagonal_MPIAIJ, 3297 MatDiagonalScale_MPIAIJ, 3298 MatNorm_MPIAIJ, 3299 /*20*/ MatAssemblyBegin_MPIAIJ, 3300 MatAssemblyEnd_MPIAIJ, 3301 MatSetOption_MPIAIJ, 3302 MatZeroEntries_MPIAIJ, 3303 /*24*/ MatZeroRows_MPIAIJ, 3304 0, 3305 #if defined(PETSC_HAVE_PBGL) 3306 0, 3307 #else 3308 0, 3309 #endif 3310 0, 3311 0, 3312 /*29*/ MatSetUp_MPIAIJ, 3313 #if defined(PETSC_HAVE_PBGL) 3314 0, 3315 #else 3316 0, 3317 #endif 3318 0, 3319 0, 3320 0, 3321 /*34*/ MatDuplicate_MPIAIJ, 3322 0, 3323 0, 3324 0, 3325 0, 3326 /*39*/ MatAXPY_MPIAIJ, 3327 MatGetSubMatrices_MPIAIJ, 3328 MatIncreaseOverlap_MPIAIJ, 3329 MatGetValues_MPIAIJ, 3330 MatCopy_MPIAIJ, 3331 /*44*/ MatGetRowMax_MPIAIJ, 3332 MatScale_MPIAIJ, 3333 0, 3334 0, 3335 MatZeroRowsColumns_MPIAIJ, 3336 /*49*/ MatSetRandom_MPIAIJ, 3337 0, 3338 0, 3339 0, 3340 0, 3341 /*54*/ MatFDColoringCreate_MPIAIJ, 3342 0, 3343 MatSetUnfactored_MPIAIJ, 3344 MatPermute_MPIAIJ, 3345 0, 3346 /*59*/ MatGetSubMatrix_MPIAIJ, 3347 MatDestroy_MPIAIJ, 3348 MatView_MPIAIJ, 3349 0, 3350 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3351 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3352 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3353 0, 3354 0, 3355 0, 3356 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3357 MatGetRowMinAbs_MPIAIJ, 3358 0, 3359 MatSetColoring_MPIAIJ, 3360 0, 3361 MatSetValuesAdifor_MPIAIJ, 3362 /*75*/ MatFDColoringApply_AIJ, 3363 0, 3364 0, 3365 0, 3366 MatFindZeroDiagonals_MPIAIJ, 3367 /*80*/ 0, 3368 0, 3369 0, 3370 /*83*/ MatLoad_MPIAIJ, 3371 0, 3372 0, 3373 0, 3374 0, 3375 0, 3376 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3377 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3378 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3379 MatPtAP_MPIAIJ_MPIAIJ, 3380 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3381 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3382 0, 3383 0, 3384 0, 3385 0, 3386 /*99*/ 0, 3387 0, 3388 0, 3389 MatConjugate_MPIAIJ, 3390 0, 3391 /*104*/MatSetValuesRow_MPIAIJ, 3392 MatRealPart_MPIAIJ, 3393 MatImaginaryPart_MPIAIJ, 3394 0, 3395 0, 3396 /*109*/0, 3397 MatGetRedundantMatrix_MPIAIJ, 3398 MatGetRowMin_MPIAIJ, 3399 0, 3400 0, 3401 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3402 0, 3403 0, 3404 0, 3405 0, 3406 /*119*/0, 3407 0, 3408 0, 3409 0, 3410 MatGetMultiProcBlock_MPIAIJ, 3411 /*124*/MatFindNonzeroRows_MPIAIJ, 3412 MatGetColumnNorms_MPIAIJ, 3413 MatInvertBlockDiagonal_MPIAIJ, 3414 0, 3415 MatGetSubMatricesParallel_MPIAIJ, 3416 /*129*/0, 3417 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3418 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3419 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3420 0, 3421 /*134*/0, 3422 0, 3423 0, 3424 0, 3425 0, 3426 /*139*/0, 3427 0 3428 }; 3429 3430 /* ----------------------------------------------------------------------------------------*/ 3431 3432 #undef __FUNCT__ 3433 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3434 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3435 { 3436 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3437 PetscErrorCode ierr; 3438 3439 PetscFunctionBegin; 3440 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3441 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3442 PetscFunctionReturn(0); 3443 } 3444 3445 #undef __FUNCT__ 3446 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3447 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3448 { 3449 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3450 PetscErrorCode ierr; 3451 3452 PetscFunctionBegin; 3453 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3454 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 #undef __FUNCT__ 3459 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3460 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3461 { 3462 Mat_MPIAIJ *b; 3463 PetscErrorCode ierr; 3464 3465 PetscFunctionBegin; 3466 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3467 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3468 b = (Mat_MPIAIJ*)B->data; 3469 3470 if (!B->preallocated) { 3471 /* Explicitly create 2 MATSEQAIJ matrices. */ 3472 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3473 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3474 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3475 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3476 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3477 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3478 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3479 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3480 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3481 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3482 } 3483 3484 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3485 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3486 B->preallocated = PETSC_TRUE; 3487 PetscFunctionReturn(0); 3488 } 3489 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3492 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3493 { 3494 Mat mat; 3495 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3496 PetscErrorCode ierr; 3497 3498 PetscFunctionBegin; 3499 *newmat = 0; 3500 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3501 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3502 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3503 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3504 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3505 a = (Mat_MPIAIJ*)mat->data; 3506 3507 mat->factortype = matin->factortype; 3508 mat->rmap->bs = matin->rmap->bs; 3509 mat->cmap->bs = matin->cmap->bs; 3510 mat->assembled = PETSC_TRUE; 3511 mat->insertmode = NOT_SET_VALUES; 3512 mat->preallocated = PETSC_TRUE; 3513 3514 a->size = oldmat->size; 3515 a->rank = oldmat->rank; 3516 a->donotstash = oldmat->donotstash; 3517 a->roworiented = oldmat->roworiented; 3518 a->rowindices = 0; 3519 a->rowvalues = 0; 3520 a->getrowactive = PETSC_FALSE; 3521 3522 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3523 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3524 3525 if (oldmat->colmap) { 3526 #if defined(PETSC_USE_CTABLE) 3527 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3528 #else 3529 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3530 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3531 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3532 #endif 3533 } else a->colmap = 0; 3534 if (oldmat->garray) { 3535 PetscInt len; 3536 len = oldmat->B->cmap->n; 3537 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3538 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3539 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3540 } else a->garray = 0; 3541 3542 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3543 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3544 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3545 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3546 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3547 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3548 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3549 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3550 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3551 *newmat = mat; 3552 PetscFunctionReturn(0); 3553 } 3554 3555 3556 3557 #undef __FUNCT__ 3558 #define __FUNCT__ "MatLoad_MPIAIJ" 3559 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3560 { 3561 PetscScalar *vals,*svals; 3562 MPI_Comm comm; 3563 PetscErrorCode ierr; 3564 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3565 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3566 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3567 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3568 PetscInt cend,cstart,n,*rowners,sizesset=1; 3569 int fd; 3570 PetscInt bs = 1; 3571 3572 PetscFunctionBegin; 3573 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3574 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3575 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3576 if (!rank) { 3577 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3578 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3579 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3580 } 3581 3582 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3583 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3584 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3585 3586 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3587 3588 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3589 M = header[1]; N = header[2]; 3590 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3591 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3592 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3593 3594 /* If global sizes are set, check if they are consistent with that given in the file */ 3595 if (sizesset) { 3596 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3597 } 3598 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3599 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3600 3601 /* determine ownership of all (block) rows */ 3602 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3603 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3604 else m = newMat->rmap->n; /* Set by user */ 3605 3606 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3607 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3608 3609 /* First process needs enough room for process with most rows */ 3610 if (!rank) { 3611 mmax = rowners[1]; 3612 for (i=2; i<=size; i++) { 3613 mmax = PetscMax(mmax, rowners[i]); 3614 } 3615 } else mmax = -1; /* unused, but compilers complain */ 3616 3617 rowners[0] = 0; 3618 for (i=2; i<=size; i++) { 3619 rowners[i] += rowners[i-1]; 3620 } 3621 rstart = rowners[rank]; 3622 rend = rowners[rank+1]; 3623 3624 /* distribute row lengths to all processors */ 3625 ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr); 3626 if (!rank) { 3627 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3628 ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3629 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3630 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3631 for (j=0; j<m; j++) { 3632 procsnz[0] += ourlens[j]; 3633 } 3634 for (i=1; i<size; i++) { 3635 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3636 /* calculate the number of nonzeros on each processor */ 3637 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3638 procsnz[i] += rowlengths[j]; 3639 } 3640 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3641 } 3642 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3643 } else { 3644 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3645 } 3646 3647 if (!rank) { 3648 /* determine max buffer needed and allocate it */ 3649 maxnz = 0; 3650 for (i=0; i<size; i++) { 3651 maxnz = PetscMax(maxnz,procsnz[i]); 3652 } 3653 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3654 3655 /* read in my part of the matrix column indices */ 3656 nz = procsnz[0]; 3657 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3658 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3659 3660 /* read in every one elses and ship off */ 3661 for (i=1; i<size; i++) { 3662 nz = procsnz[i]; 3663 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3664 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3665 } 3666 ierr = PetscFree(cols);CHKERRQ(ierr); 3667 } else { 3668 /* determine buffer space needed for message */ 3669 nz = 0; 3670 for (i=0; i<m; i++) { 3671 nz += ourlens[i]; 3672 } 3673 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3674 3675 /* receive message of column indices*/ 3676 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3677 } 3678 3679 /* determine column ownership if matrix is not square */ 3680 if (N != M) { 3681 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3682 else n = newMat->cmap->n; 3683 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3684 cstart = cend - n; 3685 } else { 3686 cstart = rstart; 3687 cend = rend; 3688 n = cend - cstart; 3689 } 3690 3691 /* loop over local rows, determining number of off diagonal entries */ 3692 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3693 jj = 0; 3694 for (i=0; i<m; i++) { 3695 for (j=0; j<ourlens[i]; j++) { 3696 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3697 jj++; 3698 } 3699 } 3700 3701 for (i=0; i<m; i++) { 3702 ourlens[i] -= offlens[i]; 3703 } 3704 if (!sizesset) { 3705 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3706 } 3707 3708 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3709 3710 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3711 3712 for (i=0; i<m; i++) { 3713 ourlens[i] += offlens[i]; 3714 } 3715 3716 if (!rank) { 3717 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3718 3719 /* read in my part of the matrix numerical values */ 3720 nz = procsnz[0]; 3721 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3722 3723 /* insert into matrix */ 3724 jj = rstart; 3725 smycols = mycols; 3726 svals = vals; 3727 for (i=0; i<m; i++) { 3728 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3729 smycols += ourlens[i]; 3730 svals += ourlens[i]; 3731 jj++; 3732 } 3733 3734 /* read in other processors and ship out */ 3735 for (i=1; i<size; i++) { 3736 nz = procsnz[i]; 3737 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3738 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3739 } 3740 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3741 } else { 3742 /* receive numeric values */ 3743 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3744 3745 /* receive message of values*/ 3746 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3747 3748 /* insert into matrix */ 3749 jj = rstart; 3750 smycols = mycols; 3751 svals = vals; 3752 for (i=0; i<m; i++) { 3753 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3754 smycols += ourlens[i]; 3755 svals += ourlens[i]; 3756 jj++; 3757 } 3758 } 3759 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3760 ierr = PetscFree(vals);CHKERRQ(ierr); 3761 ierr = PetscFree(mycols);CHKERRQ(ierr); 3762 ierr = PetscFree(rowners);CHKERRQ(ierr); 3763 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3764 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3770 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3771 { 3772 PetscErrorCode ierr; 3773 IS iscol_local; 3774 PetscInt csize; 3775 3776 PetscFunctionBegin; 3777 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3778 if (call == MAT_REUSE_MATRIX) { 3779 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3780 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3781 } else { 3782 PetscInt cbs; 3783 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3784 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3785 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3786 } 3787 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3788 if (call == MAT_INITIAL_MATRIX) { 3789 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3790 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3791 } 3792 PetscFunctionReturn(0); 3793 } 3794 3795 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3796 #undef __FUNCT__ 3797 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3798 /* 3799 Not great since it makes two copies of the submatrix, first an SeqAIJ 3800 in local and then by concatenating the local matrices the end result. 3801 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3802 3803 Note: This requires a sequential iscol with all indices. 3804 */ 3805 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3806 { 3807 PetscErrorCode ierr; 3808 PetscMPIInt rank,size; 3809 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3810 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3811 PetscBool allcolumns, colflag; 3812 Mat M,Mreuse; 3813 MatScalar *vwork,*aa; 3814 MPI_Comm comm; 3815 Mat_SeqAIJ *aij; 3816 3817 PetscFunctionBegin; 3818 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3819 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3820 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3821 3822 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3823 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3824 if (colflag && ncol == mat->cmap->N) { 3825 allcolumns = PETSC_TRUE; 3826 } else { 3827 allcolumns = PETSC_FALSE; 3828 } 3829 if (call == MAT_REUSE_MATRIX) { 3830 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3831 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3832 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3833 } else { 3834 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3835 } 3836 3837 /* 3838 m - number of local rows 3839 n - number of columns (same on all processors) 3840 rstart - first row in new global matrix generated 3841 */ 3842 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3843 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3844 if (call == MAT_INITIAL_MATRIX) { 3845 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3846 ii = aij->i; 3847 jj = aij->j; 3848 3849 /* 3850 Determine the number of non-zeros in the diagonal and off-diagonal 3851 portions of the matrix in order to do correct preallocation 3852 */ 3853 3854 /* first get start and end of "diagonal" columns */ 3855 if (csize == PETSC_DECIDE) { 3856 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3857 if (mglobal == n) { /* square matrix */ 3858 nlocal = m; 3859 } else { 3860 nlocal = n/size + ((n % size) > rank); 3861 } 3862 } else { 3863 nlocal = csize; 3864 } 3865 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3866 rstart = rend - nlocal; 3867 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3868 3869 /* next, compute all the lengths */ 3870 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3871 olens = dlens + m; 3872 for (i=0; i<m; i++) { 3873 jend = ii[i+1] - ii[i]; 3874 olen = 0; 3875 dlen = 0; 3876 for (j=0; j<jend; j++) { 3877 if (*jj < rstart || *jj >= rend) olen++; 3878 else dlen++; 3879 jj++; 3880 } 3881 olens[i] = olen; 3882 dlens[i] = dlen; 3883 } 3884 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3885 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3886 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3887 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3888 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3889 ierr = PetscFree(dlens);CHKERRQ(ierr); 3890 } else { 3891 PetscInt ml,nl; 3892 3893 M = *newmat; 3894 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3895 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3896 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3897 /* 3898 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3899 rather than the slower MatSetValues(). 3900 */ 3901 M->was_assembled = PETSC_TRUE; 3902 M->assembled = PETSC_FALSE; 3903 } 3904 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3905 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3906 ii = aij->i; 3907 jj = aij->j; 3908 aa = aij->a; 3909 for (i=0; i<m; i++) { 3910 row = rstart + i; 3911 nz = ii[i+1] - ii[i]; 3912 cwork = jj; jj += nz; 3913 vwork = aa; aa += nz; 3914 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3915 } 3916 3917 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3918 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3919 *newmat = M; 3920 3921 /* save submatrix used in processor for next request */ 3922 if (call == MAT_INITIAL_MATRIX) { 3923 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3924 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3925 } 3926 PetscFunctionReturn(0); 3927 } 3928 3929 #undef __FUNCT__ 3930 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3931 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3932 { 3933 PetscInt m,cstart, cend,j,nnz,i,d; 3934 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3935 const PetscInt *JJ; 3936 PetscScalar *values; 3937 PetscErrorCode ierr; 3938 3939 PetscFunctionBegin; 3940 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3941 3942 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3943 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3944 m = B->rmap->n; 3945 cstart = B->cmap->rstart; 3946 cend = B->cmap->rend; 3947 rstart = B->rmap->rstart; 3948 3949 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3950 3951 #if defined(PETSC_USE_DEBUGGING) 3952 for (i=0; i<m; i++) { 3953 nnz = Ii[i+1]- Ii[i]; 3954 JJ = J + Ii[i]; 3955 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3956 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3957 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3958 } 3959 #endif 3960 3961 for (i=0; i<m; i++) { 3962 nnz = Ii[i+1]- Ii[i]; 3963 JJ = J + Ii[i]; 3964 nnz_max = PetscMax(nnz_max,nnz); 3965 d = 0; 3966 for (j=0; j<nnz; j++) { 3967 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3968 } 3969 d_nnz[i] = d; 3970 o_nnz[i] = nnz - d; 3971 } 3972 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3973 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3974 3975 if (v) values = (PetscScalar*)v; 3976 else { 3977 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3978 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3979 } 3980 3981 for (i=0; i<m; i++) { 3982 ii = i + rstart; 3983 nnz = Ii[i+1]- Ii[i]; 3984 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3985 } 3986 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3987 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3988 3989 if (!v) { 3990 ierr = PetscFree(values);CHKERRQ(ierr); 3991 } 3992 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3993 PetscFunctionReturn(0); 3994 } 3995 3996 #undef __FUNCT__ 3997 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3998 /*@ 3999 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4000 (the default parallel PETSc format). 4001 4002 Collective on MPI_Comm 4003 4004 Input Parameters: 4005 + B - the matrix 4006 . i - the indices into j for the start of each local row (starts with zero) 4007 . j - the column indices for each local row (starts with zero) 4008 - v - optional values in the matrix 4009 4010 Level: developer 4011 4012 Notes: 4013 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4014 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4015 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4016 4017 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4018 4019 The format which is used for the sparse matrix input, is equivalent to a 4020 row-major ordering.. i.e for the following matrix, the input data expected is 4021 as shown: 4022 4023 1 0 0 4024 2 0 3 P0 4025 ------- 4026 4 5 6 P1 4027 4028 Process0 [P0]: rows_owned=[0,1] 4029 i = {0,1,3} [size = nrow+1 = 2+1] 4030 j = {0,0,2} [size = nz = 6] 4031 v = {1,2,3} [size = nz = 6] 4032 4033 Process1 [P1]: rows_owned=[2] 4034 i = {0,3} [size = nrow+1 = 1+1] 4035 j = {0,1,2} [size = nz = 6] 4036 v = {4,5,6} [size = nz = 6] 4037 4038 .keywords: matrix, aij, compressed row, sparse, parallel 4039 4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 4041 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4044 { 4045 PetscErrorCode ierr; 4046 4047 PetscFunctionBegin; 4048 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 #undef __FUNCT__ 4053 #define __FUNCT__ "MatMPIAIJSetPreallocation" 4054 /*@C 4055 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4056 (the default parallel PETSc format). For good matrix assembly performance 4057 the user should preallocate the matrix storage by setting the parameters 4058 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4059 performance can be increased by more than a factor of 50. 4060 4061 Collective on MPI_Comm 4062 4063 Input Parameters: 4064 + A - the matrix 4065 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4066 (same value is used for all local rows) 4067 . d_nnz - array containing the number of nonzeros in the various rows of the 4068 DIAGONAL portion of the local submatrix (possibly different for each row) 4069 or NULL, if d_nz is used to specify the nonzero structure. 4070 The size of this array is equal to the number of local rows, i.e 'm'. 4071 For matrices that will be factored, you must leave room for (and set) 4072 the diagonal entry even if it is zero. 4073 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4074 submatrix (same value is used for all local rows). 4075 - o_nnz - array containing the number of nonzeros in the various rows of the 4076 OFF-DIAGONAL portion of the local submatrix (possibly different for 4077 each row) or NULL, if o_nz is used to specify the nonzero 4078 structure. The size of this array is equal to the number 4079 of local rows, i.e 'm'. 4080 4081 If the *_nnz parameter is given then the *_nz parameter is ignored 4082 4083 The AIJ format (also called the Yale sparse matrix format or 4084 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4085 storage. The stored row and column indices begin with zero. 4086 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 4087 4088 The parallel matrix is partitioned such that the first m0 rows belong to 4089 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4090 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4091 4092 The DIAGONAL portion of the local submatrix of a processor can be defined 4093 as the submatrix which is obtained by extraction the part corresponding to 4094 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4095 first row that belongs to the processor, r2 is the last row belonging to 4096 the this processor, and c1-c2 is range of indices of the local part of a 4097 vector suitable for applying the matrix to. This is an mxn matrix. In the 4098 common case of a square matrix, the row and column ranges are the same and 4099 the DIAGONAL part is also square. The remaining portion of the local 4100 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4101 4102 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4103 4104 You can call MatGetInfo() to get information on how effective the preallocation was; 4105 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4106 You can also run with the option -info and look for messages with the string 4107 malloc in them to see if additional memory allocation was needed. 4108 4109 Example usage: 4110 4111 Consider the following 8x8 matrix with 34 non-zero values, that is 4112 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4113 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4114 as follows: 4115 4116 .vb 4117 1 2 0 | 0 3 0 | 0 4 4118 Proc0 0 5 6 | 7 0 0 | 8 0 4119 9 0 10 | 11 0 0 | 12 0 4120 ------------------------------------- 4121 13 0 14 | 15 16 17 | 0 0 4122 Proc1 0 18 0 | 19 20 21 | 0 0 4123 0 0 0 | 22 23 0 | 24 0 4124 ------------------------------------- 4125 Proc2 25 26 27 | 0 0 28 | 29 0 4126 30 0 0 | 31 32 33 | 0 34 4127 .ve 4128 4129 This can be represented as a collection of submatrices as: 4130 4131 .vb 4132 A B C 4133 D E F 4134 G H I 4135 .ve 4136 4137 Where the submatrices A,B,C are owned by proc0, D,E,F are 4138 owned by proc1, G,H,I are owned by proc2. 4139 4140 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4141 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4142 The 'M','N' parameters are 8,8, and have the same values on all procs. 4143 4144 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4145 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4146 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4147 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4148 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4149 matrix, ans [DF] as another SeqAIJ matrix. 4150 4151 When d_nz, o_nz parameters are specified, d_nz storage elements are 4152 allocated for every row of the local diagonal submatrix, and o_nz 4153 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4154 One way to choose d_nz and o_nz is to use the max nonzerors per local 4155 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4156 In this case, the values of d_nz,o_nz are: 4157 .vb 4158 proc0 : dnz = 2, o_nz = 2 4159 proc1 : dnz = 3, o_nz = 2 4160 proc2 : dnz = 1, o_nz = 4 4161 .ve 4162 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4163 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4164 for proc3. i.e we are using 12+15+10=37 storage locations to store 4165 34 values. 4166 4167 When d_nnz, o_nnz parameters are specified, the storage is specified 4168 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4169 In the above case the values for d_nnz,o_nnz are: 4170 .vb 4171 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4172 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4173 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4174 .ve 4175 Here the space allocated is sum of all the above values i.e 34, and 4176 hence pre-allocation is perfect. 4177 4178 Level: intermediate 4179 4180 .keywords: matrix, aij, compressed row, sparse, parallel 4181 4182 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4183 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 4184 @*/ 4185 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4186 { 4187 PetscErrorCode ierr; 4188 4189 PetscFunctionBegin; 4190 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4191 PetscValidType(B,1); 4192 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4193 PetscFunctionReturn(0); 4194 } 4195 4196 #undef __FUNCT__ 4197 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 4198 /*@ 4199 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4200 CSR format the local rows. 4201 4202 Collective on MPI_Comm 4203 4204 Input Parameters: 4205 + comm - MPI communicator 4206 . m - number of local rows (Cannot be PETSC_DECIDE) 4207 . n - This value should be the same as the local size used in creating the 4208 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4209 calculated if N is given) For square matrices n is almost always m. 4210 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4211 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4212 . i - row indices 4213 . j - column indices 4214 - a - matrix values 4215 4216 Output Parameter: 4217 . mat - the matrix 4218 4219 Level: intermediate 4220 4221 Notes: 4222 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4223 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4224 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4225 4226 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4227 4228 The format which is used for the sparse matrix input, is equivalent to a 4229 row-major ordering.. i.e for the following matrix, the input data expected is 4230 as shown: 4231 4232 1 0 0 4233 2 0 3 P0 4234 ------- 4235 4 5 6 P1 4236 4237 Process0 [P0]: rows_owned=[0,1] 4238 i = {0,1,3} [size = nrow+1 = 2+1] 4239 j = {0,0,2} [size = nz = 6] 4240 v = {1,2,3} [size = nz = 6] 4241 4242 Process1 [P1]: rows_owned=[2] 4243 i = {0,3} [size = nrow+1 = 1+1] 4244 j = {0,1,2} [size = nz = 6] 4245 v = {4,5,6} [size = nz = 6] 4246 4247 .keywords: matrix, aij, compressed row, sparse, parallel 4248 4249 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4250 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4251 @*/ 4252 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4253 { 4254 PetscErrorCode ierr; 4255 4256 PetscFunctionBegin; 4257 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4258 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4259 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4260 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4261 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4262 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4263 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4264 PetscFunctionReturn(0); 4265 } 4266 4267 #undef __FUNCT__ 4268 #define __FUNCT__ "MatCreateAIJ" 4269 /*@C 4270 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4271 (the default parallel PETSc format). For good matrix assembly performance 4272 the user should preallocate the matrix storage by setting the parameters 4273 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4274 performance can be increased by more than a factor of 50. 4275 4276 Collective on MPI_Comm 4277 4278 Input Parameters: 4279 + comm - MPI communicator 4280 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4281 This value should be the same as the local size used in creating the 4282 y vector for the matrix-vector product y = Ax. 4283 . n - This value should be the same as the local size used in creating the 4284 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4285 calculated if N is given) For square matrices n is almost always m. 4286 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4287 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4288 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4289 (same value is used for all local rows) 4290 . d_nnz - array containing the number of nonzeros in the various rows of the 4291 DIAGONAL portion of the local submatrix (possibly different for each row) 4292 or NULL, if d_nz is used to specify the nonzero structure. 4293 The size of this array is equal to the number of local rows, i.e 'm'. 4294 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4295 submatrix (same value is used for all local rows). 4296 - o_nnz - array containing the number of nonzeros in the various rows of the 4297 OFF-DIAGONAL portion of the local submatrix (possibly different for 4298 each row) or NULL, if o_nz is used to specify the nonzero 4299 structure. The size of this array is equal to the number 4300 of local rows, i.e 'm'. 4301 4302 Output Parameter: 4303 . A - the matrix 4304 4305 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4306 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4307 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4308 4309 Notes: 4310 If the *_nnz parameter is given then the *_nz parameter is ignored 4311 4312 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4313 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4314 storage requirements for this matrix. 4315 4316 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4317 processor than it must be used on all processors that share the object for 4318 that argument. 4319 4320 The user MUST specify either the local or global matrix dimensions 4321 (possibly both). 4322 4323 The parallel matrix is partitioned across processors such that the 4324 first m0 rows belong to process 0, the next m1 rows belong to 4325 process 1, the next m2 rows belong to process 2 etc.. where 4326 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4327 values corresponding to [m x N] submatrix. 4328 4329 The columns are logically partitioned with the n0 columns belonging 4330 to 0th partition, the next n1 columns belonging to the next 4331 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4332 4333 The DIAGONAL portion of the local submatrix on any given processor 4334 is the submatrix corresponding to the rows and columns m,n 4335 corresponding to the given processor. i.e diagonal matrix on 4336 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4337 etc. The remaining portion of the local submatrix [m x (N-n)] 4338 constitute the OFF-DIAGONAL portion. The example below better 4339 illustrates this concept. 4340 4341 For a square global matrix we define each processor's diagonal portion 4342 to be its local rows and the corresponding columns (a square submatrix); 4343 each processor's off-diagonal portion encompasses the remainder of the 4344 local matrix (a rectangular submatrix). 4345 4346 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4347 4348 When calling this routine with a single process communicator, a matrix of 4349 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4350 type of communicator, use the construction mechanism: 4351 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4352 4353 By default, this format uses inodes (identical nodes) when possible. 4354 We search for consecutive rows with the same nonzero structure, thereby 4355 reusing matrix information to achieve increased efficiency. 4356 4357 Options Database Keys: 4358 + -mat_no_inode - Do not use inodes 4359 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4360 - -mat_aij_oneindex - Internally use indexing starting at 1 4361 rather than 0. Note that when calling MatSetValues(), 4362 the user still MUST index entries starting at 0! 4363 4364 4365 Example usage: 4366 4367 Consider the following 8x8 matrix with 34 non-zero values, that is 4368 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4369 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4370 as follows: 4371 4372 .vb 4373 1 2 0 | 0 3 0 | 0 4 4374 Proc0 0 5 6 | 7 0 0 | 8 0 4375 9 0 10 | 11 0 0 | 12 0 4376 ------------------------------------- 4377 13 0 14 | 15 16 17 | 0 0 4378 Proc1 0 18 0 | 19 20 21 | 0 0 4379 0 0 0 | 22 23 0 | 24 0 4380 ------------------------------------- 4381 Proc2 25 26 27 | 0 0 28 | 29 0 4382 30 0 0 | 31 32 33 | 0 34 4383 .ve 4384 4385 This can be represented as a collection of submatrices as: 4386 4387 .vb 4388 A B C 4389 D E F 4390 G H I 4391 .ve 4392 4393 Where the submatrices A,B,C are owned by proc0, D,E,F are 4394 owned by proc1, G,H,I are owned by proc2. 4395 4396 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4397 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4398 The 'M','N' parameters are 8,8, and have the same values on all procs. 4399 4400 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4401 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4402 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4403 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4404 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4405 matrix, ans [DF] as another SeqAIJ matrix. 4406 4407 When d_nz, o_nz parameters are specified, d_nz storage elements are 4408 allocated for every row of the local diagonal submatrix, and o_nz 4409 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4410 One way to choose d_nz and o_nz is to use the max nonzerors per local 4411 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4412 In this case, the values of d_nz,o_nz are: 4413 .vb 4414 proc0 : dnz = 2, o_nz = 2 4415 proc1 : dnz = 3, o_nz = 2 4416 proc2 : dnz = 1, o_nz = 4 4417 .ve 4418 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4419 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4420 for proc3. i.e we are using 12+15+10=37 storage locations to store 4421 34 values. 4422 4423 When d_nnz, o_nnz parameters are specified, the storage is specified 4424 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4425 In the above case the values for d_nnz,o_nnz are: 4426 .vb 4427 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4428 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4429 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4430 .ve 4431 Here the space allocated is sum of all the above values i.e 34, and 4432 hence pre-allocation is perfect. 4433 4434 Level: intermediate 4435 4436 .keywords: matrix, aij, compressed row, sparse, parallel 4437 4438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4439 MPIAIJ, MatCreateMPIAIJWithArrays() 4440 @*/ 4441 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4442 { 4443 PetscErrorCode ierr; 4444 PetscMPIInt size; 4445 4446 PetscFunctionBegin; 4447 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4448 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4449 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4450 if (size > 1) { 4451 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4452 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4453 } else { 4454 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4455 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4456 } 4457 PetscFunctionReturn(0); 4458 } 4459 4460 #undef __FUNCT__ 4461 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4462 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4463 { 4464 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4465 4466 PetscFunctionBegin; 4467 *Ad = a->A; 4468 *Ao = a->B; 4469 *colmap = a->garray; 4470 PetscFunctionReturn(0); 4471 } 4472 4473 #undef __FUNCT__ 4474 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4475 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4476 { 4477 PetscErrorCode ierr; 4478 PetscInt i; 4479 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4480 4481 PetscFunctionBegin; 4482 if (coloring->ctype == IS_COLORING_GLOBAL) { 4483 ISColoringValue *allcolors,*colors; 4484 ISColoring ocoloring; 4485 4486 /* set coloring for diagonal portion */ 4487 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4488 4489 /* set coloring for off-diagonal portion */ 4490 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4491 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4492 for (i=0; i<a->B->cmap->n; i++) { 4493 colors[i] = allcolors[a->garray[i]]; 4494 } 4495 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4496 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4497 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4498 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4499 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4500 ISColoringValue *colors; 4501 PetscInt *larray; 4502 ISColoring ocoloring; 4503 4504 /* set coloring for diagonal portion */ 4505 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4506 for (i=0; i<a->A->cmap->n; i++) { 4507 larray[i] = i + A->cmap->rstart; 4508 } 4509 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4510 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4511 for (i=0; i<a->A->cmap->n; i++) { 4512 colors[i] = coloring->colors[larray[i]]; 4513 } 4514 ierr = PetscFree(larray);CHKERRQ(ierr); 4515 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4516 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4517 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4518 4519 /* set coloring for off-diagonal portion */ 4520 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4521 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4522 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4523 for (i=0; i<a->B->cmap->n; i++) { 4524 colors[i] = coloring->colors[larray[i]]; 4525 } 4526 ierr = PetscFree(larray);CHKERRQ(ierr); 4527 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4528 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4529 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4530 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4531 PetscFunctionReturn(0); 4532 } 4533 4534 #undef __FUNCT__ 4535 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4536 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4537 { 4538 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4539 PetscErrorCode ierr; 4540 4541 PetscFunctionBegin; 4542 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4543 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4544 PetscFunctionReturn(0); 4545 } 4546 4547 #undef __FUNCT__ 4548 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4549 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4550 { 4551 PetscErrorCode ierr; 4552 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4553 PetscInt *indx; 4554 4555 PetscFunctionBegin; 4556 /* This routine will ONLY return MPIAIJ type matrix */ 4557 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4558 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4559 if (n == PETSC_DECIDE) { 4560 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4561 } 4562 /* Check sum(n) = N */ 4563 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4564 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4565 4566 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4567 rstart -= m; 4568 4569 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4570 for (i=0; i<m; i++) { 4571 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4572 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4573 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4574 } 4575 4576 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4577 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4578 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4579 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4580 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4581 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 } 4584 4585 #undef __FUNCT__ 4586 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4587 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4588 { 4589 PetscErrorCode ierr; 4590 PetscInt m,N,i,rstart,nnz,Ii; 4591 PetscInt *indx; 4592 PetscScalar *values; 4593 4594 PetscFunctionBegin; 4595 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4596 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4597 for (i=0; i<m; i++) { 4598 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4599 Ii = i + rstart; 4600 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4601 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4602 } 4603 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4604 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4605 PetscFunctionReturn(0); 4606 } 4607 4608 #undef __FUNCT__ 4609 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4610 /*@ 4611 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4612 matrices from each processor 4613 4614 Collective on MPI_Comm 4615 4616 Input Parameters: 4617 + comm - the communicators the parallel matrix will live on 4618 . inmat - the input sequential matrices 4619 . n - number of local columns (or PETSC_DECIDE) 4620 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4621 4622 Output Parameter: 4623 . outmat - the parallel matrix generated 4624 4625 Level: advanced 4626 4627 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4628 4629 @*/ 4630 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4631 { 4632 PetscErrorCode ierr; 4633 4634 PetscFunctionBegin; 4635 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4636 if (scall == MAT_INITIAL_MATRIX) { 4637 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4638 } 4639 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4640 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4641 PetscFunctionReturn(0); 4642 } 4643 4644 #undef __FUNCT__ 4645 #define __FUNCT__ "MatFileSplit" 4646 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4647 { 4648 PetscErrorCode ierr; 4649 PetscMPIInt rank; 4650 PetscInt m,N,i,rstart,nnz; 4651 size_t len; 4652 const PetscInt *indx; 4653 PetscViewer out; 4654 char *name; 4655 Mat B; 4656 const PetscScalar *values; 4657 4658 PetscFunctionBegin; 4659 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4660 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4661 /* Should this be the type of the diagonal block of A? */ 4662 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4663 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4664 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4665 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4666 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4667 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4668 for (i=0; i<m; i++) { 4669 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4670 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4671 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4672 } 4673 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4674 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4675 4676 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4677 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4678 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4679 sprintf(name,"%s.%d",outfile,rank); 4680 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4681 ierr = PetscFree(name);CHKERRQ(ierr); 4682 ierr = MatView(B,out);CHKERRQ(ierr); 4683 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4684 ierr = MatDestroy(&B);CHKERRQ(ierr); 4685 PetscFunctionReturn(0); 4686 } 4687 4688 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4689 #undef __FUNCT__ 4690 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4691 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4692 { 4693 PetscErrorCode ierr; 4694 Mat_Merge_SeqsToMPI *merge; 4695 PetscContainer container; 4696 4697 PetscFunctionBegin; 4698 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4699 if (container) { 4700 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4701 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4702 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4703 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4704 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4705 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4706 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4707 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4708 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4709 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4710 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4711 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4712 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4713 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4714 ierr = PetscFree(merge);CHKERRQ(ierr); 4715 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4716 } 4717 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4718 PetscFunctionReturn(0); 4719 } 4720 4721 #include <../src/mat/utils/freespace.h> 4722 #include <petscbt.h> 4723 4724 #undef __FUNCT__ 4725 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4727 { 4728 PetscErrorCode ierr; 4729 MPI_Comm comm; 4730 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4731 PetscMPIInt size,rank,taga,*len_s; 4732 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4733 PetscInt proc,m; 4734 PetscInt **buf_ri,**buf_rj; 4735 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4736 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4737 MPI_Request *s_waits,*r_waits; 4738 MPI_Status *status; 4739 MatScalar *aa=a->a; 4740 MatScalar **abuf_r,*ba_i; 4741 Mat_Merge_SeqsToMPI *merge; 4742 PetscContainer container; 4743 4744 PetscFunctionBegin; 4745 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4746 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4747 4748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4749 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4750 4751 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4752 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4753 4754 bi = merge->bi; 4755 bj = merge->bj; 4756 buf_ri = merge->buf_ri; 4757 buf_rj = merge->buf_rj; 4758 4759 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4760 owners = merge->rowmap->range; 4761 len_s = merge->len_s; 4762 4763 /* send and recv matrix values */ 4764 /*-----------------------------*/ 4765 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4766 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4767 4768 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4769 for (proc=0,k=0; proc<size; proc++) { 4770 if (!len_s[proc]) continue; 4771 i = owners[proc]; 4772 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4773 k++; 4774 } 4775 4776 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4777 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4778 ierr = PetscFree(status);CHKERRQ(ierr); 4779 4780 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4781 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4782 4783 /* insert mat values of mpimat */ 4784 /*----------------------------*/ 4785 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4786 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4787 4788 for (k=0; k<merge->nrecv; k++) { 4789 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4790 nrows = *(buf_ri_k[k]); 4791 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4792 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4793 } 4794 4795 /* set values of ba */ 4796 m = merge->rowmap->n; 4797 for (i=0; i<m; i++) { 4798 arow = owners[rank] + i; 4799 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4800 bnzi = bi[i+1] - bi[i]; 4801 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4802 4803 /* add local non-zero vals of this proc's seqmat into ba */ 4804 anzi = ai[arow+1] - ai[arow]; 4805 aj = a->j + ai[arow]; 4806 aa = a->a + ai[arow]; 4807 nextaj = 0; 4808 for (j=0; nextaj<anzi; j++) { 4809 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4810 ba_i[j] += aa[nextaj++]; 4811 } 4812 } 4813 4814 /* add received vals into ba */ 4815 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4816 /* i-th row */ 4817 if (i == *nextrow[k]) { 4818 anzi = *(nextai[k]+1) - *nextai[k]; 4819 aj = buf_rj[k] + *(nextai[k]); 4820 aa = abuf_r[k] + *(nextai[k]); 4821 nextaj = 0; 4822 for (j=0; nextaj<anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 nextrow[k]++; nextai[k]++; 4828 } 4829 } 4830 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4831 } 4832 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4833 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4834 4835 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4836 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4837 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4838 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4839 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4840 PetscFunctionReturn(0); 4841 } 4842 4843 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4844 4845 #undef __FUNCT__ 4846 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4847 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4848 { 4849 PetscErrorCode ierr; 4850 Mat B_mpi; 4851 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4852 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4853 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4854 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4855 PetscInt len,proc,*dnz,*onz,bs,cbs; 4856 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4857 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4858 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4859 MPI_Status *status; 4860 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4861 PetscBT lnkbt; 4862 Mat_Merge_SeqsToMPI *merge; 4863 PetscContainer container; 4864 4865 PetscFunctionBegin; 4866 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4867 4868 /* make sure it is a PETSc comm */ 4869 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4872 4873 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4874 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4875 4876 /* determine row ownership */ 4877 /*---------------------------------------------------------*/ 4878 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4879 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4880 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4881 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4882 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4883 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4884 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4885 4886 m = merge->rowmap->n; 4887 owners = merge->rowmap->range; 4888 4889 /* determine the number of messages to send, their lengths */ 4890 /*---------------------------------------------------------*/ 4891 len_s = merge->len_s; 4892 4893 len = 0; /* length of buf_si[] */ 4894 merge->nsend = 0; 4895 for (proc=0; proc<size; proc++) { 4896 len_si[proc] = 0; 4897 if (proc == rank) { 4898 len_s[proc] = 0; 4899 } else { 4900 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4901 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4902 } 4903 if (len_s[proc]) { 4904 merge->nsend++; 4905 nrows = 0; 4906 for (i=owners[proc]; i<owners[proc+1]; i++) { 4907 if (ai[i+1] > ai[i]) nrows++; 4908 } 4909 len_si[proc] = 2*(nrows+1); 4910 len += len_si[proc]; 4911 } 4912 } 4913 4914 /* determine the number and length of messages to receive for ij-structure */ 4915 /*-------------------------------------------------------------------------*/ 4916 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4917 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4918 4919 /* post the Irecv of j-structure */ 4920 /*-------------------------------*/ 4921 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4922 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4923 4924 /* post the Isend of j-structure */ 4925 /*--------------------------------*/ 4926 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4927 4928 for (proc=0, k=0; proc<size; proc++) { 4929 if (!len_s[proc]) continue; 4930 i = owners[proc]; 4931 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4932 k++; 4933 } 4934 4935 /* receives and sends of j-structure are complete */ 4936 /*------------------------------------------------*/ 4937 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4938 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4939 4940 /* send and recv i-structure */ 4941 /*---------------------------*/ 4942 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4943 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4944 4945 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4946 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4947 for (proc=0,k=0; proc<size; proc++) { 4948 if (!len_s[proc]) continue; 4949 /* form outgoing message for i-structure: 4950 buf_si[0]: nrows to be sent 4951 [1:nrows]: row index (global) 4952 [nrows+1:2*nrows+1]: i-structure index 4953 */ 4954 /*-------------------------------------------*/ 4955 nrows = len_si[proc]/2 - 1; 4956 buf_si_i = buf_si + nrows+1; 4957 buf_si[0] = nrows; 4958 buf_si_i[0] = 0; 4959 nrows = 0; 4960 for (i=owners[proc]; i<owners[proc+1]; i++) { 4961 anzi = ai[i+1] - ai[i]; 4962 if (anzi) { 4963 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4964 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4965 nrows++; 4966 } 4967 } 4968 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4969 k++; 4970 buf_si += len_si[proc]; 4971 } 4972 4973 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4974 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4975 4976 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4977 for (i=0; i<merge->nrecv; i++) { 4978 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4979 } 4980 4981 ierr = PetscFree(len_si);CHKERRQ(ierr); 4982 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4983 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4984 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4985 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4986 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4987 ierr = PetscFree(status);CHKERRQ(ierr); 4988 4989 /* compute a local seq matrix in each processor */ 4990 /*----------------------------------------------*/ 4991 /* allocate bi array and free space for accumulating nonzero column info */ 4992 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4993 bi[0] = 0; 4994 4995 /* create and initialize a linked list */ 4996 nlnk = N+1; 4997 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4998 4999 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5000 len = ai[owners[rank+1]] - ai[owners[rank]]; 5001 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 5002 5003 current_space = free_space; 5004 5005 /* determine symbolic info for each local row */ 5006 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 5007 5008 for (k=0; k<merge->nrecv; k++) { 5009 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5010 nrows = *buf_ri_k[k]; 5011 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5012 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 5013 } 5014 5015 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 5016 len = 0; 5017 for (i=0; i<m; i++) { 5018 bnzi = 0; 5019 /* add local non-zero cols of this proc's seqmat into lnk */ 5020 arow = owners[rank] + i; 5021 anzi = ai[arow+1] - ai[arow]; 5022 aj = a->j + ai[arow]; 5023 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5024 bnzi += nlnk; 5025 /* add received col data into lnk */ 5026 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 5027 if (i == *nextrow[k]) { /* i-th row */ 5028 anzi = *(nextai[k]+1) - *nextai[k]; 5029 aj = buf_rj[k] + *nextai[k]; 5030 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 5031 bnzi += nlnk; 5032 nextrow[k]++; nextai[k]++; 5033 } 5034 } 5035 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5036 5037 /* if free space is not available, make more free space */ 5038 if (current_space->local_remaining<bnzi) { 5039 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 5040 nspacedouble++; 5041 } 5042 /* copy data into free space, then initialize lnk */ 5043 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 5044 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 5045 5046 current_space->array += bnzi; 5047 current_space->local_used += bnzi; 5048 current_space->local_remaining -= bnzi; 5049 5050 bi[i+1] = bi[i] + bnzi; 5051 } 5052 5053 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5054 5055 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 5056 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5057 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5058 5059 /* create symbolic parallel matrix B_mpi */ 5060 /*---------------------------------------*/ 5061 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5062 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5063 if (n==PETSC_DECIDE) { 5064 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5065 } else { 5066 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5067 } 5068 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5069 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5070 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5071 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5072 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5073 5074 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5075 B_mpi->assembled = PETSC_FALSE; 5076 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5077 merge->bi = bi; 5078 merge->bj = bj; 5079 merge->buf_ri = buf_ri; 5080 merge->buf_rj = buf_rj; 5081 merge->coi = NULL; 5082 merge->coj = NULL; 5083 merge->owners_co = NULL; 5084 5085 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5086 5087 /* attach the supporting struct to B_mpi for reuse */ 5088 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5089 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5090 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5091 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5092 *mpimat = B_mpi; 5093 5094 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5095 PetscFunctionReturn(0); 5096 } 5097 5098 #undef __FUNCT__ 5099 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 5100 /*@C 5101 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 5102 matrices from each processor 5103 5104 Collective on MPI_Comm 5105 5106 Input Parameters: 5107 + comm - the communicators the parallel matrix will live on 5108 . seqmat - the input sequential matrices 5109 . m - number of local rows (or PETSC_DECIDE) 5110 . n - number of local columns (or PETSC_DECIDE) 5111 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5112 5113 Output Parameter: 5114 . mpimat - the parallel matrix generated 5115 5116 Level: advanced 5117 5118 Notes: 5119 The dimensions of the sequential matrix in each processor MUST be the same. 5120 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5121 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5122 @*/ 5123 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5124 { 5125 PetscErrorCode ierr; 5126 PetscMPIInt size; 5127 5128 PetscFunctionBegin; 5129 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5130 if (size == 1) { 5131 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5132 if (scall == MAT_INITIAL_MATRIX) { 5133 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5134 } else { 5135 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5136 } 5137 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5138 PetscFunctionReturn(0); 5139 } 5140 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5141 if (scall == MAT_INITIAL_MATRIX) { 5142 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5143 } 5144 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5145 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 #undef __FUNCT__ 5150 #define __FUNCT__ "MatMPIAIJGetLocalMat" 5151 /*@ 5152 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 5153 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5154 with MatGetSize() 5155 5156 Not Collective 5157 5158 Input Parameters: 5159 + A - the matrix 5160 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5161 5162 Output Parameter: 5163 . A_loc - the local sequential matrix generated 5164 5165 Level: developer 5166 5167 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 5168 5169 @*/ 5170 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5171 { 5172 PetscErrorCode ierr; 5173 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5174 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 5175 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 5176 MatScalar *aa=a->a,*ba=b->a,*cam; 5177 PetscScalar *ca; 5178 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5179 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5180 PetscBool match; 5181 5182 PetscFunctionBegin; 5183 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5184 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5185 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5186 if (scall == MAT_INITIAL_MATRIX) { 5187 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 5188 ci[0] = 0; 5189 for (i=0; i<am; i++) { 5190 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5191 } 5192 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 5193 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 5194 k = 0; 5195 for (i=0; i<am; i++) { 5196 ncols_o = bi[i+1] - bi[i]; 5197 ncols_d = ai[i+1] - ai[i]; 5198 /* off-diagonal portion of A */ 5199 for (jo=0; jo<ncols_o; jo++) { 5200 col = cmap[*bj]; 5201 if (col >= cstart) break; 5202 cj[k] = col; bj++; 5203 ca[k++] = *ba++; 5204 } 5205 /* diagonal portion of A */ 5206 for (j=0; j<ncols_d; j++) { 5207 cj[k] = cstart + *aj++; 5208 ca[k++] = *aa++; 5209 } 5210 /* off-diagonal portion of A */ 5211 for (j=jo; j<ncols_o; j++) { 5212 cj[k] = cmap[*bj++]; 5213 ca[k++] = *ba++; 5214 } 5215 } 5216 /* put together the new matrix */ 5217 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5218 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5219 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5220 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5221 mat->free_a = PETSC_TRUE; 5222 mat->free_ij = PETSC_TRUE; 5223 mat->nonew = 0; 5224 } else if (scall == MAT_REUSE_MATRIX) { 5225 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5226 ci = mat->i; cj = mat->j; cam = mat->a; 5227 for (i=0; i<am; i++) { 5228 /* off-diagonal portion of A */ 5229 ncols_o = bi[i+1] - bi[i]; 5230 for (jo=0; jo<ncols_o; jo++) { 5231 col = cmap[*bj]; 5232 if (col >= cstart) break; 5233 *cam++ = *ba++; bj++; 5234 } 5235 /* diagonal portion of A */ 5236 ncols_d = ai[i+1] - ai[i]; 5237 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5238 /* off-diagonal portion of A */ 5239 for (j=jo; j<ncols_o; j++) { 5240 *cam++ = *ba++; bj++; 5241 } 5242 } 5243 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5244 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5245 PetscFunctionReturn(0); 5246 } 5247 5248 #undef __FUNCT__ 5249 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5250 /*@C 5251 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5252 5253 Not Collective 5254 5255 Input Parameters: 5256 + A - the matrix 5257 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5258 - row, col - index sets of rows and columns to extract (or NULL) 5259 5260 Output Parameter: 5261 . A_loc - the local sequential matrix generated 5262 5263 Level: developer 5264 5265 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5266 5267 @*/ 5268 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5269 { 5270 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5271 PetscErrorCode ierr; 5272 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5273 IS isrowa,iscola; 5274 Mat *aloc; 5275 PetscBool match; 5276 5277 PetscFunctionBegin; 5278 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5279 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5280 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5281 if (!row) { 5282 start = A->rmap->rstart; end = A->rmap->rend; 5283 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5284 } else { 5285 isrowa = *row; 5286 } 5287 if (!col) { 5288 start = A->cmap->rstart; 5289 cmap = a->garray; 5290 nzA = a->A->cmap->n; 5291 nzB = a->B->cmap->n; 5292 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5293 ncols = 0; 5294 for (i=0; i<nzB; i++) { 5295 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5296 else break; 5297 } 5298 imark = i; 5299 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5300 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5301 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5302 } else { 5303 iscola = *col; 5304 } 5305 if (scall != MAT_INITIAL_MATRIX) { 5306 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5307 aloc[0] = *A_loc; 5308 } 5309 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5310 *A_loc = aloc[0]; 5311 ierr = PetscFree(aloc);CHKERRQ(ierr); 5312 if (!row) { 5313 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5314 } 5315 if (!col) { 5316 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5317 } 5318 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5319 PetscFunctionReturn(0); 5320 } 5321 5322 #undef __FUNCT__ 5323 #define __FUNCT__ "MatGetBrowsOfAcols" 5324 /*@C 5325 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5326 5327 Collective on Mat 5328 5329 Input Parameters: 5330 + A,B - the matrices in mpiaij format 5331 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5332 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5333 5334 Output Parameter: 5335 + rowb, colb - index sets of rows and columns of B to extract 5336 - B_seq - the sequential matrix generated 5337 5338 Level: developer 5339 5340 @*/ 5341 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5342 { 5343 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5344 PetscErrorCode ierr; 5345 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5346 IS isrowb,iscolb; 5347 Mat *bseq=NULL; 5348 5349 PetscFunctionBegin; 5350 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5351 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5352 } 5353 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5354 5355 if (scall == MAT_INITIAL_MATRIX) { 5356 start = A->cmap->rstart; 5357 cmap = a->garray; 5358 nzA = a->A->cmap->n; 5359 nzB = a->B->cmap->n; 5360 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5361 ncols = 0; 5362 for (i=0; i<nzB; i++) { /* row < local row index */ 5363 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5364 else break; 5365 } 5366 imark = i; 5367 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5368 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5369 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5370 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5371 } else { 5372 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5373 isrowb = *rowb; iscolb = *colb; 5374 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5375 bseq[0] = *B_seq; 5376 } 5377 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5378 *B_seq = bseq[0]; 5379 ierr = PetscFree(bseq);CHKERRQ(ierr); 5380 if (!rowb) { 5381 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5382 } else { 5383 *rowb = isrowb; 5384 } 5385 if (!colb) { 5386 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5387 } else { 5388 *colb = iscolb; 5389 } 5390 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5391 PetscFunctionReturn(0); 5392 } 5393 5394 #undef __FUNCT__ 5395 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5396 /* 5397 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5398 of the OFF-DIAGONAL portion of local A 5399 5400 Collective on Mat 5401 5402 Input Parameters: 5403 + A,B - the matrices in mpiaij format 5404 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5405 5406 Output Parameter: 5407 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5408 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5409 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5410 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5411 5412 Level: developer 5413 5414 */ 5415 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5416 { 5417 VecScatter_MPI_General *gen_to,*gen_from; 5418 PetscErrorCode ierr; 5419 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5420 Mat_SeqAIJ *b_oth; 5421 VecScatter ctx =a->Mvctx; 5422 MPI_Comm comm; 5423 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5424 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5425 PetscScalar *rvalues,*svalues; 5426 MatScalar *b_otha,*bufa,*bufA; 5427 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5428 MPI_Request *rwaits = NULL,*swaits = NULL; 5429 MPI_Status *sstatus,rstatus; 5430 PetscMPIInt jj; 5431 PetscInt *cols,sbs,rbs; 5432 PetscScalar *vals; 5433 5434 PetscFunctionBegin; 5435 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5436 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5437 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5438 } 5439 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5441 5442 gen_to = (VecScatter_MPI_General*)ctx->todata; 5443 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5444 rvalues = gen_from->values; /* holds the length of receiving row */ 5445 svalues = gen_to->values; /* holds the length of sending row */ 5446 nrecvs = gen_from->n; 5447 nsends = gen_to->n; 5448 5449 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5450 srow = gen_to->indices; /* local row index to be sent */ 5451 sstarts = gen_to->starts; 5452 sprocs = gen_to->procs; 5453 sstatus = gen_to->sstatus; 5454 sbs = gen_to->bs; 5455 rstarts = gen_from->starts; 5456 rprocs = gen_from->procs; 5457 rbs = gen_from->bs; 5458 5459 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5460 if (scall == MAT_INITIAL_MATRIX) { 5461 /* i-array */ 5462 /*---------*/ 5463 /* post receives */ 5464 for (i=0; i<nrecvs; i++) { 5465 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5466 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5467 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5468 } 5469 5470 /* pack the outgoing message */ 5471 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5472 5473 sstartsj[0] = 0; 5474 rstartsj[0] = 0; 5475 len = 0; /* total length of j or a array to be sent */ 5476 k = 0; 5477 for (i=0; i<nsends; i++) { 5478 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5479 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5480 for (j=0; j<nrows; j++) { 5481 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5482 for (l=0; l<sbs; l++) { 5483 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5484 5485 rowlen[j*sbs+l] = ncols; 5486 5487 len += ncols; 5488 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5489 } 5490 k++; 5491 } 5492 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5493 5494 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5495 } 5496 /* recvs and sends of i-array are completed */ 5497 i = nrecvs; 5498 while (i--) { 5499 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5500 } 5501 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5502 5503 /* allocate buffers for sending j and a arrays */ 5504 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5505 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5506 5507 /* create i-array of B_oth */ 5508 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5509 5510 b_othi[0] = 0; 5511 len = 0; /* total length of j or a array to be received */ 5512 k = 0; 5513 for (i=0; i<nrecvs; i++) { 5514 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5515 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5516 for (j=0; j<nrows; j++) { 5517 b_othi[k+1] = b_othi[k] + rowlen[j]; 5518 len += rowlen[j]; k++; 5519 } 5520 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5521 } 5522 5523 /* allocate space for j and a arrrays of B_oth */ 5524 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5525 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5526 5527 /* j-array */ 5528 /*---------*/ 5529 /* post receives of j-array */ 5530 for (i=0; i<nrecvs; i++) { 5531 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5532 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5533 } 5534 5535 /* pack the outgoing message j-array */ 5536 k = 0; 5537 for (i=0; i<nsends; i++) { 5538 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5539 bufJ = bufj+sstartsj[i]; 5540 for (j=0; j<nrows; j++) { 5541 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5542 for (ll=0; ll<sbs; ll++) { 5543 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5544 for (l=0; l<ncols; l++) { 5545 *bufJ++ = cols[l]; 5546 } 5547 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5548 } 5549 } 5550 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5551 } 5552 5553 /* recvs and sends of j-array are completed */ 5554 i = nrecvs; 5555 while (i--) { 5556 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5557 } 5558 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5559 } else if (scall == MAT_REUSE_MATRIX) { 5560 sstartsj = *startsj_s; 5561 rstartsj = *startsj_r; 5562 bufa = *bufa_ptr; 5563 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5564 b_otha = b_oth->a; 5565 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5566 5567 /* a-array */ 5568 /*---------*/ 5569 /* post receives of a-array */ 5570 for (i=0; i<nrecvs; i++) { 5571 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5572 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5573 } 5574 5575 /* pack the outgoing message a-array */ 5576 k = 0; 5577 for (i=0; i<nsends; i++) { 5578 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5579 bufA = bufa+sstartsj[i]; 5580 for (j=0; j<nrows; j++) { 5581 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5582 for (ll=0; ll<sbs; ll++) { 5583 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5584 for (l=0; l<ncols; l++) { 5585 *bufA++ = vals[l]; 5586 } 5587 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5588 } 5589 } 5590 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5591 } 5592 /* recvs and sends of a-array are completed */ 5593 i = nrecvs; 5594 while (i--) { 5595 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5596 } 5597 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5598 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5599 5600 if (scall == MAT_INITIAL_MATRIX) { 5601 /* put together the new matrix */ 5602 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5603 5604 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5605 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5606 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5607 b_oth->free_a = PETSC_TRUE; 5608 b_oth->free_ij = PETSC_TRUE; 5609 b_oth->nonew = 0; 5610 5611 ierr = PetscFree(bufj);CHKERRQ(ierr); 5612 if (!startsj_s || !bufa_ptr) { 5613 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5614 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5615 } else { 5616 *startsj_s = sstartsj; 5617 *startsj_r = rstartsj; 5618 *bufa_ptr = bufa; 5619 } 5620 } 5621 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5622 PetscFunctionReturn(0); 5623 } 5624 5625 #undef __FUNCT__ 5626 #define __FUNCT__ "MatGetCommunicationStructs" 5627 /*@C 5628 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5629 5630 Not Collective 5631 5632 Input Parameters: 5633 . A - The matrix in mpiaij format 5634 5635 Output Parameter: 5636 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5637 . colmap - A map from global column index to local index into lvec 5638 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5639 5640 Level: developer 5641 5642 @*/ 5643 #if defined(PETSC_USE_CTABLE) 5644 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5645 #else 5646 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5647 #endif 5648 { 5649 Mat_MPIAIJ *a; 5650 5651 PetscFunctionBegin; 5652 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5653 PetscValidPointer(lvec, 2); 5654 PetscValidPointer(colmap, 3); 5655 PetscValidPointer(multScatter, 4); 5656 a = (Mat_MPIAIJ*) A->data; 5657 if (lvec) *lvec = a->lvec; 5658 if (colmap) *colmap = a->colmap; 5659 if (multScatter) *multScatter = a->Mvctx; 5660 PetscFunctionReturn(0); 5661 } 5662 5663 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5664 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5665 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5666 5667 #undef __FUNCT__ 5668 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5669 /* 5670 Computes (B'*A')' since computing B*A directly is untenable 5671 5672 n p p 5673 ( ) ( ) ( ) 5674 m ( A ) * n ( B ) = m ( C ) 5675 ( ) ( ) ( ) 5676 5677 */ 5678 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5679 { 5680 PetscErrorCode ierr; 5681 Mat At,Bt,Ct; 5682 5683 PetscFunctionBegin; 5684 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5685 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5686 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5687 ierr = MatDestroy(&At);CHKERRQ(ierr); 5688 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5689 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5690 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5691 PetscFunctionReturn(0); 5692 } 5693 5694 #undef __FUNCT__ 5695 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5696 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5697 { 5698 PetscErrorCode ierr; 5699 PetscInt m=A->rmap->n,n=B->cmap->n; 5700 Mat Cmat; 5701 5702 PetscFunctionBegin; 5703 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5704 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5705 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5706 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5707 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5708 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5709 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5710 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5711 5712 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5713 5714 *C = Cmat; 5715 PetscFunctionReturn(0); 5716 } 5717 5718 /* ----------------------------------------------------------------*/ 5719 #undef __FUNCT__ 5720 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5721 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5722 { 5723 PetscErrorCode ierr; 5724 5725 PetscFunctionBegin; 5726 if (scall == MAT_INITIAL_MATRIX) { 5727 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5728 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5729 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5730 } 5731 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5732 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5733 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5734 PetscFunctionReturn(0); 5735 } 5736 5737 #if defined(PETSC_HAVE_MUMPS) 5738 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5739 #endif 5740 #if defined(PETSC_HAVE_PASTIX) 5741 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5742 #endif 5743 #if defined(PETSC_HAVE_SUPERLU_DIST) 5744 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5745 #endif 5746 #if defined(PETSC_HAVE_CLIQUE) 5747 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5748 #endif 5749 5750 /*MC 5751 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5752 5753 Options Database Keys: 5754 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5755 5756 Level: beginner 5757 5758 .seealso: MatCreateAIJ() 5759 M*/ 5760 5761 #undef __FUNCT__ 5762 #define __FUNCT__ "MatCreate_MPIAIJ" 5763 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5764 { 5765 Mat_MPIAIJ *b; 5766 PetscErrorCode ierr; 5767 PetscMPIInt size; 5768 5769 PetscFunctionBegin; 5770 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5771 5772 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5773 B->data = (void*)b; 5774 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5775 B->assembled = PETSC_FALSE; 5776 B->insertmode = NOT_SET_VALUES; 5777 b->size = size; 5778 5779 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5780 5781 /* build cache for off array entries formed */ 5782 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5783 5784 b->donotstash = PETSC_FALSE; 5785 b->colmap = 0; 5786 b->garray = 0; 5787 b->roworiented = PETSC_TRUE; 5788 5789 /* stuff used for matrix vector multiply */ 5790 b->lvec = NULL; 5791 b->Mvctx = NULL; 5792 5793 /* stuff for MatGetRow() */ 5794 b->rowindices = 0; 5795 b->rowvalues = 0; 5796 b->getrowactive = PETSC_FALSE; 5797 5798 /* flexible pointer used in CUSP/CUSPARSE classes */ 5799 b->spptr = NULL; 5800 5801 #if defined(PETSC_HAVE_MUMPS) 5802 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5803 #endif 5804 #if defined(PETSC_HAVE_PASTIX) 5805 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5806 #endif 5807 #if defined(PETSC_HAVE_SUPERLU_DIST) 5808 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5809 #endif 5810 #if defined(PETSC_HAVE_CLIQUE) 5811 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5812 #endif 5813 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5814 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5815 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5816 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5817 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5818 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5819 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5820 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5821 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5822 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5823 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5824 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5825 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5826 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5827 PetscFunctionReturn(0); 5828 } 5829 5830 #undef __FUNCT__ 5831 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5832 /*@ 5833 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5834 and "off-diagonal" part of the matrix in CSR format. 5835 5836 Collective on MPI_Comm 5837 5838 Input Parameters: 5839 + comm - MPI communicator 5840 . m - number of local rows (Cannot be PETSC_DECIDE) 5841 . n - This value should be the same as the local size used in creating the 5842 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5843 calculated if N is given) For square matrices n is almost always m. 5844 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5845 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5846 . i - row indices for "diagonal" portion of matrix 5847 . j - column indices 5848 . a - matrix values 5849 . oi - row indices for "off-diagonal" portion of matrix 5850 . oj - column indices 5851 - oa - matrix values 5852 5853 Output Parameter: 5854 . mat - the matrix 5855 5856 Level: advanced 5857 5858 Notes: 5859 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5860 must free the arrays once the matrix has been destroyed and not before. 5861 5862 The i and j indices are 0 based 5863 5864 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5865 5866 This sets local rows and cannot be used to set off-processor values. 5867 5868 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5869 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5870 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5871 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5872 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5873 communication if it is known that only local entries will be set. 5874 5875 .keywords: matrix, aij, compressed row, sparse, parallel 5876 5877 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5878 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5879 @*/ 5880 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5881 { 5882 PetscErrorCode ierr; 5883 Mat_MPIAIJ *maij; 5884 5885 PetscFunctionBegin; 5886 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5887 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5888 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5889 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5890 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5891 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5892 maij = (Mat_MPIAIJ*) (*mat)->data; 5893 5894 (*mat)->preallocated = PETSC_TRUE; 5895 5896 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5897 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5898 5899 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5900 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5901 5902 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5903 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5904 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5905 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5906 5907 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5908 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5909 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5910 PetscFunctionReturn(0); 5911 } 5912 5913 /* 5914 Special version for direct calls from Fortran 5915 */ 5916 #include <petsc-private/fortranimpl.h> 5917 5918 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5919 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5920 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5921 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5922 #endif 5923 5924 /* Change these macros so can be used in void function */ 5925 #undef CHKERRQ 5926 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5927 #undef SETERRQ2 5928 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5929 #undef SETERRQ3 5930 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5931 #undef SETERRQ 5932 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5933 5934 #undef __FUNCT__ 5935 #define __FUNCT__ "matsetvaluesmpiaij_" 5936 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5937 { 5938 Mat mat = *mmat; 5939 PetscInt m = *mm, n = *mn; 5940 InsertMode addv = *maddv; 5941 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5942 PetscScalar value; 5943 PetscErrorCode ierr; 5944 5945 MatCheckPreallocated(mat,1); 5946 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5947 5948 #if defined(PETSC_USE_DEBUG) 5949 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5950 #endif 5951 { 5952 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5953 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5954 PetscBool roworiented = aij->roworiented; 5955 5956 /* Some Variables required in the macro */ 5957 Mat A = aij->A; 5958 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5959 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5960 MatScalar *aa = a->a; 5961 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5962 Mat B = aij->B; 5963 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5964 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5965 MatScalar *ba = b->a; 5966 5967 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5968 PetscInt nonew = a->nonew; 5969 MatScalar *ap1,*ap2; 5970 5971 PetscFunctionBegin; 5972 for (i=0; i<m; i++) { 5973 if (im[i] < 0) continue; 5974 #if defined(PETSC_USE_DEBUG) 5975 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5976 #endif 5977 if (im[i] >= rstart && im[i] < rend) { 5978 row = im[i] - rstart; 5979 lastcol1 = -1; 5980 rp1 = aj + ai[row]; 5981 ap1 = aa + ai[row]; 5982 rmax1 = aimax[row]; 5983 nrow1 = ailen[row]; 5984 low1 = 0; 5985 high1 = nrow1; 5986 lastcol2 = -1; 5987 rp2 = bj + bi[row]; 5988 ap2 = ba + bi[row]; 5989 rmax2 = bimax[row]; 5990 nrow2 = bilen[row]; 5991 low2 = 0; 5992 high2 = nrow2; 5993 5994 for (j=0; j<n; j++) { 5995 if (roworiented) value = v[i*n+j]; 5996 else value = v[i+j*m]; 5997 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5998 if (in[j] >= cstart && in[j] < cend) { 5999 col = in[j] - cstart; 6000 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 6001 } else if (in[j] < 0) continue; 6002 #if defined(PETSC_USE_DEBUG) 6003 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6004 #endif 6005 else { 6006 if (mat->was_assembled) { 6007 if (!aij->colmap) { 6008 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6009 } 6010 #if defined(PETSC_USE_CTABLE) 6011 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6012 col--; 6013 #else 6014 col = aij->colmap[in[j]] - 1; 6015 #endif 6016 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6017 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6018 col = in[j]; 6019 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6020 B = aij->B; 6021 b = (Mat_SeqAIJ*)B->data; 6022 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6023 rp2 = bj + bi[row]; 6024 ap2 = ba + bi[row]; 6025 rmax2 = bimax[row]; 6026 nrow2 = bilen[row]; 6027 low2 = 0; 6028 high2 = nrow2; 6029 bm = aij->B->rmap->n; 6030 ba = b->a; 6031 } 6032 } else col = in[j]; 6033 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 6034 } 6035 } 6036 } else if (!aij->donotstash) { 6037 if (roworiented) { 6038 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6039 } else { 6040 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6041 } 6042 } 6043 } 6044 } 6045 PetscFunctionReturnVoid(); 6046 } 6047 6048