1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 const char *matname; 1387 1388 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1389 if (!rank) { 1390 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1391 } else { 1392 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1393 } 1394 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1395 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1396 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1397 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1398 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1399 1400 /* copy over the A part */ 1401 Aloc = (Mat_SeqAIJ*)aij->A->data; 1402 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1403 row = mat->rmap->rstart; 1404 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1405 for (i=0; i<m; i++) { 1406 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1407 row++; 1408 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1409 } 1410 aj = Aloc->j; 1411 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1412 1413 /* copy over the B part */ 1414 Aloc = (Mat_SeqAIJ*)aij->B->data; 1415 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1416 row = mat->rmap->rstart; 1417 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1418 ct = cols; 1419 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1420 for (i=0; i<m; i++) { 1421 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1422 row++; 1423 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1424 } 1425 ierr = PetscFree(ct);CHKERRQ(ierr); 1426 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1428 /* 1429 Everyone has to call to draw the matrix since the graphics waits are 1430 synchronized across all processors that share the PetscDraw object 1431 */ 1432 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1433 ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1434 if (!rank) { 1435 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 1436 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1437 } 1438 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1439 ierr = MatDestroy(&A);CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatView_MPIAIJ" 1446 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1447 { 1448 PetscErrorCode ierr; 1449 PetscBool iascii,isdraw,issocket,isbinary; 1450 1451 PetscFunctionBegin; 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1453 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1454 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1455 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1456 if (iascii || isdraw || isbinary || issocket) { 1457 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1458 } 1459 PetscFunctionReturn(0); 1460 } 1461 1462 #undef __FUNCT__ 1463 #define __FUNCT__ "MatSOR_MPIAIJ" 1464 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1465 { 1466 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1467 PetscErrorCode ierr; 1468 Vec bb1 = 0; 1469 PetscBool hasop; 1470 1471 PetscFunctionBegin; 1472 if (flag == SOR_APPLY_UPPER) { 1473 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1478 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1479 } 1480 1481 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1482 if (flag & SOR_ZERO_INITIAL_GUESS) { 1483 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1484 its--; 1485 } 1486 1487 while (its--) { 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 1491 /* update rhs: bb1 = bb - B*x */ 1492 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1493 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1494 1495 /* local sweep */ 1496 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1497 } 1498 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1499 if (flag & SOR_ZERO_INITIAL_GUESS) { 1500 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1501 its--; 1502 } 1503 while (its--) { 1504 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1505 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1506 1507 /* update rhs: bb1 = bb - B*x */ 1508 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1509 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1510 1511 /* local sweep */ 1512 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1513 } 1514 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1515 if (flag & SOR_ZERO_INITIAL_GUESS) { 1516 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1517 its--; 1518 } 1519 while (its--) { 1520 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1521 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1522 1523 /* update rhs: bb1 = bb - B*x */ 1524 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1525 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1526 1527 /* local sweep */ 1528 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1529 } 1530 } else if (flag & SOR_EISENSTAT) { 1531 Vec xx1; 1532 1533 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1535 1536 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1537 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1538 if (!mat->diag) { 1539 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1540 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1541 } 1542 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1543 if (hasop) { 1544 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1545 } else { 1546 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1547 } 1548 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1549 1550 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1551 1552 /* local sweep */ 1553 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1554 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1555 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1556 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1557 1558 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1559 PetscFunctionReturn(0); 1560 } 1561 1562 #undef __FUNCT__ 1563 #define __FUNCT__ "MatPermute_MPIAIJ" 1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1565 { 1566 Mat aA,aB,Aperm; 1567 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1568 PetscScalar *aa,*ba; 1569 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1570 PetscSF rowsf,sf; 1571 IS parcolp = NULL; 1572 PetscBool done; 1573 PetscErrorCode ierr; 1574 1575 PetscFunctionBegin; 1576 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1577 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1578 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1579 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1580 1581 /* Invert row permutation to find out where my rows should go */ 1582 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1583 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1584 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1585 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1586 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1587 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1588 1589 /* Invert column permutation to find out where my columns should go */ 1590 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1591 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1592 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1593 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1594 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1595 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1596 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1597 1598 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1599 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1600 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1601 1602 /* Find out where my gcols should go */ 1603 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1604 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1605 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1606 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1607 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1608 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1609 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1610 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1611 1612 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1613 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1614 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1615 for (i=0; i<m; i++) { 1616 PetscInt row = rdest[i],rowner; 1617 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1618 for (j=ai[i]; j<ai[i+1]; j++) { 1619 PetscInt cowner,col = cdest[aj[j]]; 1620 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1621 if (rowner == cowner) dnnz[i]++; 1622 else onnz[i]++; 1623 } 1624 for (j=bi[i]; j<bi[i+1]; j++) { 1625 PetscInt cowner,col = gcdest[bj[j]]; 1626 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1627 if (rowner == cowner) dnnz[i]++; 1628 else onnz[i]++; 1629 } 1630 } 1631 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1632 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1633 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1634 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1635 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1636 1637 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1638 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1639 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1640 for (i=0; i<m; i++) { 1641 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1642 PetscInt j0,rowlen; 1643 rowlen = ai[i+1] - ai[i]; 1644 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1645 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1646 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1647 } 1648 rowlen = bi[i+1] - bi[i]; 1649 for (j0=j=0; j<rowlen; j0=j) { 1650 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1651 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1652 } 1653 } 1654 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1655 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1656 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1657 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1658 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1659 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1660 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1661 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1662 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1663 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1664 *B = Aperm; 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNCT__ 1669 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1670 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1671 { 1672 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1673 Mat A = mat->A,B = mat->B; 1674 PetscErrorCode ierr; 1675 PetscReal isend[5],irecv[5]; 1676 1677 PetscFunctionBegin; 1678 info->block_size = 1.0; 1679 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1680 1681 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1682 isend[3] = info->memory; isend[4] = info->mallocs; 1683 1684 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1685 1686 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1687 isend[3] += info->memory; isend[4] += info->mallocs; 1688 if (flag == MAT_LOCAL) { 1689 info->nz_used = isend[0]; 1690 info->nz_allocated = isend[1]; 1691 info->nz_unneeded = isend[2]; 1692 info->memory = isend[3]; 1693 info->mallocs = isend[4]; 1694 } else if (flag == MAT_GLOBAL_MAX) { 1695 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1696 1697 info->nz_used = irecv[0]; 1698 info->nz_allocated = irecv[1]; 1699 info->nz_unneeded = irecv[2]; 1700 info->memory = irecv[3]; 1701 info->mallocs = irecv[4]; 1702 } else if (flag == MAT_GLOBAL_SUM) { 1703 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1704 1705 info->nz_used = irecv[0]; 1706 info->nz_allocated = irecv[1]; 1707 info->nz_unneeded = irecv[2]; 1708 info->memory = irecv[3]; 1709 info->mallocs = irecv[4]; 1710 } 1711 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1712 info->fill_ratio_needed = 0; 1713 info->factor_mallocs = 0; 1714 PetscFunctionReturn(0); 1715 } 1716 1717 #undef __FUNCT__ 1718 #define __FUNCT__ "MatSetOption_MPIAIJ" 1719 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1720 { 1721 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 switch (op) { 1726 case MAT_NEW_NONZERO_LOCATIONS: 1727 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1728 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1729 case MAT_KEEP_NONZERO_PATTERN: 1730 case MAT_NEW_NONZERO_LOCATION_ERR: 1731 case MAT_USE_INODES: 1732 case MAT_IGNORE_ZERO_ENTRIES: 1733 MatCheckPreallocated(A,1); 1734 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1735 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1736 break; 1737 case MAT_ROW_ORIENTED: 1738 a->roworiented = flg; 1739 1740 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1741 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1742 break; 1743 case MAT_NEW_DIAGONALS: 1744 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1745 break; 1746 case MAT_IGNORE_OFF_PROC_ENTRIES: 1747 a->donotstash = flg; 1748 break; 1749 case MAT_SPD: 1750 A->spd_set = PETSC_TRUE; 1751 A->spd = flg; 1752 if (flg) { 1753 A->symmetric = PETSC_TRUE; 1754 A->structurally_symmetric = PETSC_TRUE; 1755 A->symmetric_set = PETSC_TRUE; 1756 A->structurally_symmetric_set = PETSC_TRUE; 1757 } 1758 break; 1759 case MAT_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_STRUCTURALLY_SYMMETRIC: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_HERMITIAN: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 case MAT_SYMMETRY_ETERNAL: 1769 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1770 break; 1771 default: 1772 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1773 } 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatGetRow_MPIAIJ" 1779 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1780 { 1781 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1782 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1783 PetscErrorCode ierr; 1784 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1785 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1786 PetscInt *cmap,*idx_p; 1787 1788 PetscFunctionBegin; 1789 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1790 mat->getrowactive = PETSC_TRUE; 1791 1792 if (!mat->rowvalues && (idx || v)) { 1793 /* 1794 allocate enough space to hold information from the longest row. 1795 */ 1796 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1797 PetscInt max = 1,tmp; 1798 for (i=0; i<matin->rmap->n; i++) { 1799 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1800 if (max < tmp) max = tmp; 1801 } 1802 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1803 } 1804 1805 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1806 lrow = row - rstart; 1807 1808 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1809 if (!v) {pvA = 0; pvB = 0;} 1810 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1811 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1812 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1813 nztot = nzA + nzB; 1814 1815 cmap = mat->garray; 1816 if (v || idx) { 1817 if (nztot) { 1818 /* Sort by increasing column numbers, assuming A and B already sorted */ 1819 PetscInt imark = -1; 1820 if (v) { 1821 *v = v_p = mat->rowvalues; 1822 for (i=0; i<nzB; i++) { 1823 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1824 else break; 1825 } 1826 imark = i; 1827 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1828 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1829 } 1830 if (idx) { 1831 *idx = idx_p = mat->rowindices; 1832 if (imark > -1) { 1833 for (i=0; i<imark; i++) { 1834 idx_p[i] = cmap[cworkB[i]]; 1835 } 1836 } else { 1837 for (i=0; i<nzB; i++) { 1838 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1839 else break; 1840 } 1841 imark = i; 1842 } 1843 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1844 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1845 } 1846 } else { 1847 if (idx) *idx = 0; 1848 if (v) *v = 0; 1849 } 1850 } 1851 *nz = nztot; 1852 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1853 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1854 PetscFunctionReturn(0); 1855 } 1856 1857 #undef __FUNCT__ 1858 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1859 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1860 { 1861 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1862 1863 PetscFunctionBegin; 1864 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1865 aij->getrowactive = PETSC_FALSE; 1866 PetscFunctionReturn(0); 1867 } 1868 1869 #undef __FUNCT__ 1870 #define __FUNCT__ "MatNorm_MPIAIJ" 1871 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1872 { 1873 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1874 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1875 PetscErrorCode ierr; 1876 PetscInt i,j,cstart = mat->cmap->rstart; 1877 PetscReal sum = 0.0; 1878 MatScalar *v; 1879 1880 PetscFunctionBegin; 1881 if (aij->size == 1) { 1882 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1883 } else { 1884 if (type == NORM_FROBENIUS) { 1885 v = amat->a; 1886 for (i=0; i<amat->nz; i++) { 1887 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1888 } 1889 v = bmat->a; 1890 for (i=0; i<bmat->nz; i++) { 1891 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1892 } 1893 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1894 *norm = PetscSqrtReal(*norm); 1895 } else if (type == NORM_1) { /* max column norm */ 1896 PetscReal *tmp,*tmp2; 1897 PetscInt *jj,*garray = aij->garray; 1898 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1899 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1900 *norm = 0.0; 1901 v = amat->a; jj = amat->j; 1902 for (j=0; j<amat->nz; j++) { 1903 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1904 } 1905 v = bmat->a; jj = bmat->j; 1906 for (j=0; j<bmat->nz; j++) { 1907 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1908 } 1909 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1910 for (j=0; j<mat->cmap->N; j++) { 1911 if (tmp2[j] > *norm) *norm = tmp2[j]; 1912 } 1913 ierr = PetscFree(tmp);CHKERRQ(ierr); 1914 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1915 } else if (type == NORM_INFINITY) { /* max row norm */ 1916 PetscReal ntemp = 0.0; 1917 for (j=0; j<aij->A->rmap->n; j++) { 1918 v = amat->a + amat->i[j]; 1919 sum = 0.0; 1920 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1921 sum += PetscAbsScalar(*v); v++; 1922 } 1923 v = bmat->a + bmat->i[j]; 1924 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1925 sum += PetscAbsScalar(*v); v++; 1926 } 1927 if (sum > ntemp) ntemp = sum; 1928 } 1929 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1930 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1931 } 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatTranspose_MPIAIJ" 1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1938 { 1939 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1940 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1941 PetscErrorCode ierr; 1942 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1943 PetscInt cstart = A->cmap->rstart,ncol; 1944 Mat B; 1945 MatScalar *array; 1946 1947 PetscFunctionBegin; 1948 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1949 1950 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1951 ai = Aloc->i; aj = Aloc->j; 1952 bi = Bloc->i; bj = Bloc->j; 1953 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1954 PetscInt *d_nnz,*g_nnz,*o_nnz; 1955 PetscSFNode *oloc; 1956 PETSC_UNUSED PetscSF sf; 1957 1958 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1959 /* compute d_nnz for preallocation */ 1960 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1961 for (i=0; i<ai[ma]; i++) { 1962 d_nnz[aj[i]]++; 1963 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1964 } 1965 /* compute local off-diagonal contributions */ 1966 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1967 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1968 /* map those to global */ 1969 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1970 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1971 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1972 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1973 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1974 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1975 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1976 1977 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1978 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1979 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1980 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1981 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1982 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1983 } else { 1984 B = *matout; 1985 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1986 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1987 } 1988 1989 /* copy over the A part */ 1990 array = Aloc->a; 1991 row = A->rmap->rstart; 1992 for (i=0; i<ma; i++) { 1993 ncol = ai[i+1]-ai[i]; 1994 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1995 row++; 1996 array += ncol; aj += ncol; 1997 } 1998 aj = Aloc->j; 1999 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2000 2001 /* copy over the B part */ 2002 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2003 array = Bloc->a; 2004 row = A->rmap->rstart; 2005 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2006 cols_tmp = cols; 2007 for (i=0; i<mb; i++) { 2008 ncol = bi[i+1]-bi[i]; 2009 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2010 row++; 2011 array += ncol; cols_tmp += ncol; 2012 } 2013 ierr = PetscFree(cols);CHKERRQ(ierr); 2014 2015 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2016 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2018 *matout = B; 2019 } else { 2020 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2021 } 2022 PetscFunctionReturn(0); 2023 } 2024 2025 #undef __FUNCT__ 2026 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2027 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2028 { 2029 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2030 Mat a = aij->A,b = aij->B; 2031 PetscErrorCode ierr; 2032 PetscInt s1,s2,s3; 2033 2034 PetscFunctionBegin; 2035 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2036 if (rr) { 2037 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2038 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2039 /* Overlap communication with computation. */ 2040 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2041 } 2042 if (ll) { 2043 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2044 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2045 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2046 } 2047 /* scale the diagonal block */ 2048 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2049 2050 if (rr) { 2051 /* Do a scatter end and then right scale the off-diagonal block */ 2052 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2053 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2054 } 2055 PetscFunctionReturn(0); 2056 } 2057 2058 #undef __FUNCT__ 2059 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2060 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2061 { 2062 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2063 PetscErrorCode ierr; 2064 2065 PetscFunctionBegin; 2066 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatEqual_MPIAIJ" 2072 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2073 { 2074 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2075 Mat a,b,c,d; 2076 PetscBool flg; 2077 PetscErrorCode ierr; 2078 2079 PetscFunctionBegin; 2080 a = matA->A; b = matA->B; 2081 c = matB->A; d = matB->B; 2082 2083 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2084 if (flg) { 2085 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2086 } 2087 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatCopy_MPIAIJ" 2093 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2094 { 2095 PetscErrorCode ierr; 2096 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2097 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2098 2099 PetscFunctionBegin; 2100 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2101 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2102 /* because of the column compression in the off-processor part of the matrix a->B, 2103 the number of columns in a->B and b->B may be different, hence we cannot call 2104 the MatCopy() directly on the two parts. If need be, we can provide a more 2105 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2106 then copying the submatrices */ 2107 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2108 } else { 2109 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2110 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #undef __FUNCT__ 2116 #define __FUNCT__ "MatSetUp_MPIAIJ" 2117 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2118 { 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /* 2127 Computes the number of nonzeros per row needed for preallocation when X and Y 2128 have different nonzero structure. 2129 */ 2130 #undef __FUNCT__ 2131 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2132 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2133 { 2134 PetscInt i,j,k,nzx,nzy; 2135 2136 PetscFunctionBegin; 2137 /* Set the number of nonzeros in the new matrix */ 2138 for (i=0; i<m; i++) { 2139 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2140 nzx = xi[i+1] - xi[i]; 2141 nzy = yi[i+1] - yi[i]; 2142 nnz[i] = 0; 2143 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2144 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2145 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2146 nnz[i]++; 2147 } 2148 for (; k<nzy; k++) nnz[i]++; 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2156 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2157 { 2158 PetscErrorCode ierr; 2159 PetscInt m = Y->rmap->N; 2160 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2161 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2162 2163 PetscFunctionBegin; 2164 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 #undef __FUNCT__ 2169 #define __FUNCT__ "MatAXPY_MPIAIJ" 2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2171 { 2172 PetscErrorCode ierr; 2173 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2174 PetscBLASInt bnz,one=1; 2175 Mat_SeqAIJ *x,*y; 2176 2177 PetscFunctionBegin; 2178 if (str == SAME_NONZERO_PATTERN) { 2179 PetscScalar alpha = a; 2180 x = (Mat_SeqAIJ*)xx->A->data; 2181 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2182 y = (Mat_SeqAIJ*)yy->A->data; 2183 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2184 x = (Mat_SeqAIJ*)xx->B->data; 2185 y = (Mat_SeqAIJ*)yy->B->data; 2186 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2187 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2188 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2189 } else if (str == SUBSET_NONZERO_PATTERN) { 2190 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2191 } else { 2192 Mat B; 2193 PetscInt *nnz_d,*nnz_o; 2194 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2195 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2196 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2197 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2198 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2199 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2200 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2201 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2202 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2203 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2204 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2205 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2206 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2207 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2213 2214 #undef __FUNCT__ 2215 #define __FUNCT__ "MatConjugate_MPIAIJ" 2216 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2217 { 2218 #if defined(PETSC_USE_COMPLEX) 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2221 2222 PetscFunctionBegin; 2223 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2224 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2225 #else 2226 PetscFunctionBegin; 2227 #endif 2228 PetscFunctionReturn(0); 2229 } 2230 2231 #undef __FUNCT__ 2232 #define __FUNCT__ "MatRealPart_MPIAIJ" 2233 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2234 { 2235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2236 PetscErrorCode ierr; 2237 2238 PetscFunctionBegin; 2239 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2240 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 #undef __FUNCT__ 2245 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2247 { 2248 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2249 PetscErrorCode ierr; 2250 2251 PetscFunctionBegin; 2252 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2253 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 #if defined(PETSC_HAVE_PBGL) 2258 2259 #include <boost/parallel/mpi/bsp_process_group.hpp> 2260 #include <boost/graph/distributed/ilu_default_graph.hpp> 2261 #include <boost/graph/distributed/ilu_0_block.hpp> 2262 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2263 #include <boost/graph/distributed/petsc/interface.hpp> 2264 #include <boost/multi_array.hpp> 2265 #include <boost/parallel/distributed_property_map->hpp> 2266 2267 #undef __FUNCT__ 2268 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2269 /* 2270 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2271 */ 2272 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2273 { 2274 namespace petsc = boost::distributed::petsc; 2275 2276 namespace graph_dist = boost::graph::distributed; 2277 using boost::graph::distributed::ilu_default::process_group_type; 2278 using boost::graph::ilu_permuted; 2279 2280 PetscBool row_identity, col_identity; 2281 PetscContainer c; 2282 PetscInt m, n, M, N; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2287 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2288 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2289 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2290 2291 process_group_type pg; 2292 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2293 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2294 lgraph_type& level_graph = *lgraph_p; 2295 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2296 2297 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2298 ilu_permuted(level_graph); 2299 2300 /* put together the new matrix */ 2301 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2302 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2303 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2304 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2305 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2306 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2307 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2308 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2309 2310 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2311 ierr = PetscContainerSetPointer(c, lgraph_p); 2312 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2313 ierr = PetscContainerDestroy(&c); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 #undef __FUNCT__ 2318 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2319 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2320 { 2321 PetscFunctionBegin; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatSolve_MPIAIJ" 2327 /* 2328 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2329 */ 2330 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2331 { 2332 namespace graph_dist = boost::graph::distributed; 2333 2334 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2335 lgraph_type *lgraph_p; 2336 PetscContainer c; 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2341 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2342 ierr = VecCopy(b, x);CHKERRQ(ierr); 2343 2344 PetscScalar *array_x; 2345 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2346 PetscInt sx; 2347 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2348 2349 PetscScalar *array_b; 2350 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2351 PetscInt sb; 2352 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2353 2354 lgraph_type& level_graph = *lgraph_p; 2355 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2356 2357 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2358 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2359 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2360 2361 typedef boost::iterator_property_map<array_ref_type::iterator, 2362 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2363 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2364 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2365 2366 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2367 PetscFunctionReturn(0); 2368 } 2369 #endif 2370 2371 2372 #undef __FUNCT__ 2373 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2374 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2375 { 2376 PetscMPIInt rank,size; 2377 MPI_Comm comm; 2378 PetscErrorCode ierr; 2379 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2380 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2381 PetscInt *rowrange = mat->rmap->range; 2382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2383 Mat A = aij->A,B=aij->B,C=*matredundant; 2384 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2385 PetscScalar *sbuf_a; 2386 PetscInt nzlocal=a->nz+b->nz; 2387 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2388 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2389 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2390 MatScalar *aworkA,*aworkB; 2391 PetscScalar *vals; 2392 PetscMPIInt tag1,tag2,tag3,imdex; 2393 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2394 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2395 MPI_Status recv_status,*send_status; 2396 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2397 PetscInt **rbuf_j=NULL; 2398 PetscScalar **rbuf_a=NULL; 2399 Mat_Redundant *redund =NULL; 2400 2401 PetscFunctionBegin; 2402 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2403 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2404 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2405 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2406 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2407 2408 if (reuse == MAT_REUSE_MATRIX) { 2409 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2410 if (subsize == 1) { 2411 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2412 redund = c->redundant; 2413 } else { 2414 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2415 redund = c->redundant; 2416 } 2417 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2418 2419 nsends = redund->nsends; 2420 nrecvs = redund->nrecvs; 2421 send_rank = redund->send_rank; 2422 recv_rank = redund->recv_rank; 2423 sbuf_nz = redund->sbuf_nz; 2424 rbuf_nz = redund->rbuf_nz; 2425 sbuf_j = redund->sbuf_j; 2426 sbuf_a = redund->sbuf_a; 2427 rbuf_j = redund->rbuf_j; 2428 rbuf_a = redund->rbuf_a; 2429 } 2430 2431 if (reuse == MAT_INITIAL_MATRIX) { 2432 PetscInt nleftover,np_subcomm; 2433 2434 /* get the destination processors' id send_rank, nsends and nrecvs */ 2435 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2436 2437 np_subcomm = size/nsubcomm; 2438 nleftover = size - nsubcomm*np_subcomm; 2439 2440 /* block of codes below is specific for INTERLACED */ 2441 /* ------------------------------------------------*/ 2442 nsends = 0; nrecvs = 0; 2443 for (i=0; i<size; i++) { 2444 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2445 send_rank[nsends++] = i; 2446 recv_rank[nrecvs++] = i; 2447 } 2448 } 2449 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2450 i = size-nleftover-1; 2451 j = 0; 2452 while (j < nsubcomm - nleftover) { 2453 send_rank[nsends++] = i; 2454 i--; j++; 2455 } 2456 } 2457 2458 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2459 for (i=0; i<nleftover; i++) { 2460 recv_rank[nrecvs++] = size-nleftover+i; 2461 } 2462 } 2463 /*----------------------------------------------*/ 2464 2465 /* allocate sbuf_j, sbuf_a */ 2466 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2467 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2468 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2469 /* 2470 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2471 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2472 */ 2473 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2474 2475 /* copy mat's local entries into the buffers */ 2476 if (reuse == MAT_INITIAL_MATRIX) { 2477 rownz_max = 0; 2478 rptr = sbuf_j; 2479 cols = sbuf_j + rend-rstart + 1; 2480 vals = sbuf_a; 2481 rptr[0] = 0; 2482 for (i=0; i<rend-rstart; i++) { 2483 row = i + rstart; 2484 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2485 ncols = nzA + nzB; 2486 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2487 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2488 /* load the column indices for this row into cols */ 2489 lwrite = 0; 2490 for (l=0; l<nzB; l++) { 2491 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2492 vals[lwrite] = aworkB[l]; 2493 cols[lwrite++] = ctmp; 2494 } 2495 } 2496 for (l=0; l<nzA; l++) { 2497 vals[lwrite] = aworkA[l]; 2498 cols[lwrite++] = cstart + cworkA[l]; 2499 } 2500 for (l=0; l<nzB; l++) { 2501 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2502 vals[lwrite] = aworkB[l]; 2503 cols[lwrite++] = ctmp; 2504 } 2505 } 2506 vals += ncols; 2507 cols += ncols; 2508 rptr[i+1] = rptr[i] + ncols; 2509 if (rownz_max < ncols) rownz_max = ncols; 2510 } 2511 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2512 } else { /* only copy matrix values into sbuf_a */ 2513 rptr = sbuf_j; 2514 vals = sbuf_a; 2515 rptr[0] = 0; 2516 for (i=0; i<rend-rstart; i++) { 2517 row = i + rstart; 2518 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2519 ncols = nzA + nzB; 2520 cworkB = b->j + b->i[i]; 2521 aworkA = a->a + a->i[i]; 2522 aworkB = b->a + b->i[i]; 2523 lwrite = 0; 2524 for (l=0; l<nzB; l++) { 2525 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2526 } 2527 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2528 for (l=0; l<nzB; l++) { 2529 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2530 } 2531 vals += ncols; 2532 rptr[i+1] = rptr[i] + ncols; 2533 } 2534 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2535 2536 /* send nzlocal to others, and recv other's nzlocal */ 2537 /*--------------------------------------------------*/ 2538 if (reuse == MAT_INITIAL_MATRIX) { 2539 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2540 2541 s_waits2 = s_waits3 + nsends; 2542 s_waits1 = s_waits2 + nsends; 2543 r_waits1 = s_waits1 + nsends; 2544 r_waits2 = r_waits1 + nrecvs; 2545 r_waits3 = r_waits2 + nrecvs; 2546 } else { 2547 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2548 2549 r_waits3 = s_waits3 + nsends; 2550 } 2551 2552 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2553 if (reuse == MAT_INITIAL_MATRIX) { 2554 /* get new tags to keep the communication clean */ 2555 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2556 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2557 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2558 2559 /* post receives of other's nzlocal */ 2560 for (i=0; i<nrecvs; i++) { 2561 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2562 } 2563 /* send nzlocal to others */ 2564 for (i=0; i<nsends; i++) { 2565 sbuf_nz[i] = nzlocal; 2566 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2567 } 2568 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2569 count = nrecvs; 2570 while (count) { 2571 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2572 2573 recv_rank[imdex] = recv_status.MPI_SOURCE; 2574 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2575 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2576 2577 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2578 2579 rbuf_nz[imdex] += i + 2; 2580 2581 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2582 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2583 count--; 2584 } 2585 /* wait on sends of nzlocal */ 2586 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2587 /* send mat->i,j to others, and recv from other's */ 2588 /*------------------------------------------------*/ 2589 for (i=0; i<nsends; i++) { 2590 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2591 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2592 } 2593 /* wait on receives of mat->i,j */ 2594 /*------------------------------*/ 2595 count = nrecvs; 2596 while (count) { 2597 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2598 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2599 count--; 2600 } 2601 /* wait on sends of mat->i,j */ 2602 /*---------------------------*/ 2603 if (nsends) { 2604 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2605 } 2606 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2607 2608 /* post receives, send and receive mat->a */ 2609 /*----------------------------------------*/ 2610 for (imdex=0; imdex<nrecvs; imdex++) { 2611 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2612 } 2613 for (i=0; i<nsends; i++) { 2614 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2615 } 2616 count = nrecvs; 2617 while (count) { 2618 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2619 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2620 count--; 2621 } 2622 if (nsends) { 2623 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2624 } 2625 2626 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2627 2628 /* create redundant matrix */ 2629 /*-------------------------*/ 2630 if (reuse == MAT_INITIAL_MATRIX) { 2631 const PetscInt *range; 2632 PetscInt rstart_sub,rend_sub,mloc_sub; 2633 2634 /* compute rownz_max for preallocation */ 2635 for (imdex=0; imdex<nrecvs; imdex++) { 2636 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2637 rptr = rbuf_j[imdex]; 2638 for (i=0; i<j; i++) { 2639 ncols = rptr[i+1] - rptr[i]; 2640 if (rownz_max < ncols) rownz_max = ncols; 2641 } 2642 } 2643 2644 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2645 2646 /* get local size of redundant matrix 2647 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2648 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2649 rstart_sub = range[nsubcomm*subrank]; 2650 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2651 rend_sub = range[nsubcomm*(subrank+1)]; 2652 } else { 2653 rend_sub = mat->rmap->N; 2654 } 2655 mloc_sub = rend_sub - rstart_sub; 2656 2657 if (M == N) { 2658 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2659 } else { /* non-square matrix */ 2660 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2661 } 2662 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2663 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2664 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2665 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2666 } else { 2667 C = *matredundant; 2668 } 2669 2670 /* insert local matrix entries */ 2671 rptr = sbuf_j; 2672 cols = sbuf_j + rend-rstart + 1; 2673 vals = sbuf_a; 2674 for (i=0; i<rend-rstart; i++) { 2675 row = i + rstart; 2676 ncols = rptr[i+1] - rptr[i]; 2677 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2678 vals += ncols; 2679 cols += ncols; 2680 } 2681 /* insert received matrix entries */ 2682 for (imdex=0; imdex<nrecvs; imdex++) { 2683 rstart = rowrange[recv_rank[imdex]]; 2684 rend = rowrange[recv_rank[imdex]+1]; 2685 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2686 rptr = rbuf_j[imdex]; 2687 cols = rbuf_j[imdex] + rend-rstart + 1; 2688 vals = rbuf_a[imdex]; 2689 for (i=0; i<rend-rstart; i++) { 2690 row = i + rstart; 2691 ncols = rptr[i+1] - rptr[i]; 2692 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2693 vals += ncols; 2694 cols += ncols; 2695 } 2696 } 2697 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2698 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2699 2700 if (reuse == MAT_INITIAL_MATRIX) { 2701 *matredundant = C; 2702 2703 /* create a supporting struct and attach it to C for reuse */ 2704 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2705 if (subsize == 1) { 2706 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2707 c->redundant = redund; 2708 } else { 2709 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2710 c->redundant = redund; 2711 } 2712 2713 redund->nzlocal = nzlocal; 2714 redund->nsends = nsends; 2715 redund->nrecvs = nrecvs; 2716 redund->send_rank = send_rank; 2717 redund->recv_rank = recv_rank; 2718 redund->sbuf_nz = sbuf_nz; 2719 redund->rbuf_nz = rbuf_nz; 2720 redund->sbuf_j = sbuf_j; 2721 redund->sbuf_a = sbuf_a; 2722 redund->rbuf_j = rbuf_j; 2723 redund->rbuf_a = rbuf_a; 2724 redund->psubcomm = NULL; 2725 } 2726 PetscFunctionReturn(0); 2727 } 2728 2729 #undef __FUNCT__ 2730 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2731 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2732 { 2733 PetscErrorCode ierr; 2734 MPI_Comm comm; 2735 PetscMPIInt size,subsize; 2736 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2737 Mat_Redundant *redund=NULL; 2738 PetscSubcomm psubcomm=NULL; 2739 MPI_Comm subcomm_in=subcomm; 2740 Mat *matseq; 2741 IS isrow,iscol; 2742 2743 PetscFunctionBegin; 2744 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2745 if (reuse == MAT_INITIAL_MATRIX) { 2746 /* create psubcomm, then get subcomm */ 2747 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2748 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2749 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2750 2751 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2752 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2753 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2754 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2755 subcomm = psubcomm->comm; 2756 } else { /* retrieve psubcomm and subcomm */ 2757 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2758 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2759 if (subsize == 1) { 2760 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2761 redund = c->redundant; 2762 } else { 2763 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2764 redund = c->redundant; 2765 } 2766 psubcomm = redund->psubcomm; 2767 } 2768 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2769 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2770 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2771 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2772 if (subsize == 1) { 2773 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2774 c->redundant->psubcomm = psubcomm; 2775 } else { 2776 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2777 c->redundant->psubcomm = psubcomm ; 2778 } 2779 } 2780 PetscFunctionReturn(0); 2781 } 2782 } 2783 2784 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2785 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2786 if (reuse == MAT_INITIAL_MATRIX) { 2787 /* create a local sequential matrix matseq[0] */ 2788 mloc_sub = PETSC_DECIDE; 2789 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2790 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2791 rstart = rend - mloc_sub; 2792 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2793 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2794 } else { /* reuse == MAT_REUSE_MATRIX */ 2795 if (subsize == 1) { 2796 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2797 redund = c->redundant; 2798 } else { 2799 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2800 redund = c->redundant; 2801 } 2802 2803 isrow = redund->isrow; 2804 iscol = redund->iscol; 2805 matseq = redund->matseq; 2806 } 2807 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2808 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2809 2810 if (reuse == MAT_INITIAL_MATRIX) { 2811 /* create a supporting struct and attach it to C for reuse */ 2812 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2813 if (subsize == 1) { 2814 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2815 c->redundant = redund; 2816 } else { 2817 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2818 c->redundant = redund; 2819 } 2820 redund->isrow = isrow; 2821 redund->iscol = iscol; 2822 redund->matseq = matseq; 2823 redund->psubcomm = psubcomm; 2824 } 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2831 { 2832 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2833 PetscErrorCode ierr; 2834 PetscInt i,*idxb = 0; 2835 PetscScalar *va,*vb; 2836 Vec vtmp; 2837 2838 PetscFunctionBegin; 2839 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2840 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2841 if (idx) { 2842 for (i=0; i<A->rmap->n; i++) { 2843 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2844 } 2845 } 2846 2847 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2848 if (idx) { 2849 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2850 } 2851 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2852 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2853 2854 for (i=0; i<A->rmap->n; i++) { 2855 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2856 va[i] = vb[i]; 2857 if (idx) idx[i] = a->garray[idxb[i]]; 2858 } 2859 } 2860 2861 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2862 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2863 ierr = PetscFree(idxb);CHKERRQ(ierr); 2864 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #undef __FUNCT__ 2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2871 { 2872 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2873 PetscErrorCode ierr; 2874 PetscInt i,*idxb = 0; 2875 PetscScalar *va,*vb; 2876 Vec vtmp; 2877 2878 PetscFunctionBegin; 2879 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2880 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2881 if (idx) { 2882 for (i=0; i<A->cmap->n; i++) { 2883 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2884 } 2885 } 2886 2887 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2888 if (idx) { 2889 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2890 } 2891 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2892 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2893 2894 for (i=0; i<A->rmap->n; i++) { 2895 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2896 va[i] = vb[i]; 2897 if (idx) idx[i] = a->garray[idxb[i]]; 2898 } 2899 } 2900 2901 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2902 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2903 ierr = PetscFree(idxb);CHKERRQ(ierr); 2904 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 #undef __FUNCT__ 2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2911 { 2912 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2913 PetscInt n = A->rmap->n; 2914 PetscInt cstart = A->cmap->rstart; 2915 PetscInt *cmap = mat->garray; 2916 PetscInt *diagIdx, *offdiagIdx; 2917 Vec diagV, offdiagV; 2918 PetscScalar *a, *diagA, *offdiagA; 2919 PetscInt r; 2920 PetscErrorCode ierr; 2921 2922 PetscFunctionBegin; 2923 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2924 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2925 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2926 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2927 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2928 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2929 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2930 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2931 for (r = 0; r < n; ++r) { 2932 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2933 a[r] = diagA[r]; 2934 idx[r] = cstart + diagIdx[r]; 2935 } else { 2936 a[r] = offdiagA[r]; 2937 idx[r] = cmap[offdiagIdx[r]]; 2938 } 2939 } 2940 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2941 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2942 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2943 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2944 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2945 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2946 PetscFunctionReturn(0); 2947 } 2948 2949 #undef __FUNCT__ 2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2952 { 2953 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2954 PetscInt n = A->rmap->n; 2955 PetscInt cstart = A->cmap->rstart; 2956 PetscInt *cmap = mat->garray; 2957 PetscInt *diagIdx, *offdiagIdx; 2958 Vec diagV, offdiagV; 2959 PetscScalar *a, *diagA, *offdiagA; 2960 PetscInt r; 2961 PetscErrorCode ierr; 2962 2963 PetscFunctionBegin; 2964 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2965 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2966 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2967 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2968 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2969 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2970 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2971 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2972 for (r = 0; r < n; ++r) { 2973 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2974 a[r] = diagA[r]; 2975 idx[r] = cstart + diagIdx[r]; 2976 } else { 2977 a[r] = offdiagA[r]; 2978 idx[r] = cmap[offdiagIdx[r]]; 2979 } 2980 } 2981 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2982 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2983 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2984 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2985 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2986 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2987 PetscFunctionReturn(0); 2988 } 2989 2990 #undef __FUNCT__ 2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2993 { 2994 PetscErrorCode ierr; 2995 Mat *dummy; 2996 2997 PetscFunctionBegin; 2998 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2999 *newmat = *dummy; 3000 ierr = PetscFree(dummy);CHKERRQ(ierr); 3001 PetscFunctionReturn(0); 3002 } 3003 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3006 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3007 { 3008 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 #undef __FUNCT__ 3017 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3018 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3019 { 3020 PetscErrorCode ierr; 3021 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3022 3023 PetscFunctionBegin; 3024 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3025 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3026 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3027 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3028 PetscFunctionReturn(0); 3029 } 3030 3031 /* -------------------------------------------------------------------*/ 3032 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3033 MatGetRow_MPIAIJ, 3034 MatRestoreRow_MPIAIJ, 3035 MatMult_MPIAIJ, 3036 /* 4*/ MatMultAdd_MPIAIJ, 3037 MatMultTranspose_MPIAIJ, 3038 MatMultTransposeAdd_MPIAIJ, 3039 #if defined(PETSC_HAVE_PBGL) 3040 MatSolve_MPIAIJ, 3041 #else 3042 0, 3043 #endif 3044 0, 3045 0, 3046 /*10*/ 0, 3047 0, 3048 0, 3049 MatSOR_MPIAIJ, 3050 MatTranspose_MPIAIJ, 3051 /*15*/ MatGetInfo_MPIAIJ, 3052 MatEqual_MPIAIJ, 3053 MatGetDiagonal_MPIAIJ, 3054 MatDiagonalScale_MPIAIJ, 3055 MatNorm_MPIAIJ, 3056 /*20*/ MatAssemblyBegin_MPIAIJ, 3057 MatAssemblyEnd_MPIAIJ, 3058 MatSetOption_MPIAIJ, 3059 MatZeroEntries_MPIAIJ, 3060 /*24*/ MatZeroRows_MPIAIJ, 3061 0, 3062 #if defined(PETSC_HAVE_PBGL) 3063 0, 3064 #else 3065 0, 3066 #endif 3067 0, 3068 0, 3069 /*29*/ MatSetUp_MPIAIJ, 3070 #if defined(PETSC_HAVE_PBGL) 3071 0, 3072 #else 3073 0, 3074 #endif 3075 0, 3076 0, 3077 0, 3078 /*34*/ MatDuplicate_MPIAIJ, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*39*/ MatAXPY_MPIAIJ, 3084 MatGetSubMatrices_MPIAIJ, 3085 MatIncreaseOverlap_MPIAIJ, 3086 MatGetValues_MPIAIJ, 3087 MatCopy_MPIAIJ, 3088 /*44*/ MatGetRowMax_MPIAIJ, 3089 MatScale_MPIAIJ, 3090 0, 3091 0, 3092 MatZeroRowsColumns_MPIAIJ, 3093 /*49*/ MatSetRandom_MPIAIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*54*/ MatFDColoringCreate_MPIXAIJ, 3099 0, 3100 MatSetUnfactored_MPIAIJ, 3101 MatPermute_MPIAIJ, 3102 0, 3103 /*59*/ MatGetSubMatrix_MPIAIJ, 3104 MatDestroy_MPIAIJ, 3105 MatView_MPIAIJ, 3106 0, 3107 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3108 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3109 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3110 0, 3111 0, 3112 0, 3113 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3114 MatGetRowMinAbs_MPIAIJ, 3115 0, 3116 MatSetColoring_MPIAIJ, 3117 0, 3118 MatSetValuesAdifor_MPIAIJ, 3119 /*75*/ MatFDColoringApply_AIJ, 3120 0, 3121 0, 3122 0, 3123 MatFindZeroDiagonals_MPIAIJ, 3124 /*80*/ 0, 3125 0, 3126 0, 3127 /*83*/ MatLoad_MPIAIJ, 3128 0, 3129 0, 3130 0, 3131 0, 3132 0, 3133 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3134 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3135 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3136 MatPtAP_MPIAIJ_MPIAIJ, 3137 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3138 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3139 0, 3140 0, 3141 0, 3142 0, 3143 /*99*/ 0, 3144 0, 3145 0, 3146 MatConjugate_MPIAIJ, 3147 0, 3148 /*104*/MatSetValuesRow_MPIAIJ, 3149 MatRealPart_MPIAIJ, 3150 MatImaginaryPart_MPIAIJ, 3151 0, 3152 0, 3153 /*109*/0, 3154 MatGetRedundantMatrix_MPIAIJ, 3155 MatGetRowMin_MPIAIJ, 3156 0, 3157 0, 3158 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3159 0, 3160 0, 3161 0, 3162 0, 3163 /*119*/0, 3164 0, 3165 0, 3166 0, 3167 MatGetMultiProcBlock_MPIAIJ, 3168 /*124*/MatFindNonzeroRows_MPIAIJ, 3169 MatGetColumnNorms_MPIAIJ, 3170 MatInvertBlockDiagonal_MPIAIJ, 3171 0, 3172 MatGetSubMatricesParallel_MPIAIJ, 3173 /*129*/0, 3174 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3175 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3176 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3177 0, 3178 /*134*/0, 3179 0, 3180 0, 3181 0, 3182 0, 3183 /*139*/0, 3184 0, 3185 0, 3186 MatFDColoringSetUp_MPIXAIJ 3187 }; 3188 3189 /* ----------------------------------------------------------------------------------------*/ 3190 3191 #undef __FUNCT__ 3192 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3193 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3194 { 3195 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3196 PetscErrorCode ierr; 3197 3198 PetscFunctionBegin; 3199 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3200 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3201 PetscFunctionReturn(0); 3202 } 3203 3204 #undef __FUNCT__ 3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3206 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3207 { 3208 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3209 PetscErrorCode ierr; 3210 3211 PetscFunctionBegin; 3212 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3213 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3214 PetscFunctionReturn(0); 3215 } 3216 3217 #undef __FUNCT__ 3218 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3219 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3220 { 3221 Mat_MPIAIJ *b; 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3227 b = (Mat_MPIAIJ*)B->data; 3228 3229 if (!B->preallocated) { 3230 /* Explicitly create 2 MATSEQAIJ matrices. */ 3231 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3232 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3233 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3234 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3235 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3236 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3237 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3238 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3239 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3240 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3241 } 3242 3243 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3244 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3245 B->preallocated = PETSC_TRUE; 3246 PetscFunctionReturn(0); 3247 } 3248 3249 #undef __FUNCT__ 3250 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3251 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3252 { 3253 Mat mat; 3254 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3255 PetscErrorCode ierr; 3256 3257 PetscFunctionBegin; 3258 *newmat = 0; 3259 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3260 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3261 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3262 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3263 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3264 a = (Mat_MPIAIJ*)mat->data; 3265 3266 mat->factortype = matin->factortype; 3267 mat->assembled = PETSC_TRUE; 3268 mat->insertmode = NOT_SET_VALUES; 3269 mat->preallocated = PETSC_TRUE; 3270 3271 a->size = oldmat->size; 3272 a->rank = oldmat->rank; 3273 a->donotstash = oldmat->donotstash; 3274 a->roworiented = oldmat->roworiented; 3275 a->rowindices = 0; 3276 a->rowvalues = 0; 3277 a->getrowactive = PETSC_FALSE; 3278 3279 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3280 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3281 3282 if (oldmat->colmap) { 3283 #if defined(PETSC_USE_CTABLE) 3284 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3285 #else 3286 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3287 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3288 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3289 #endif 3290 } else a->colmap = 0; 3291 if (oldmat->garray) { 3292 PetscInt len; 3293 len = oldmat->B->cmap->n; 3294 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3295 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3296 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3297 } else a->garray = 0; 3298 3299 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3300 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3301 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3302 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3303 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3305 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3306 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3307 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3308 *newmat = mat; 3309 PetscFunctionReturn(0); 3310 } 3311 3312 3313 3314 #undef __FUNCT__ 3315 #define __FUNCT__ "MatLoad_MPIAIJ" 3316 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3317 { 3318 PetscScalar *vals,*svals; 3319 MPI_Comm comm; 3320 PetscErrorCode ierr; 3321 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3322 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3323 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3324 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3325 PetscInt cend,cstart,n,*rowners,sizesset=1; 3326 int fd; 3327 PetscInt bs = newMat->rmap->bs; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3331 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3332 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3333 if (!rank) { 3334 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3335 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3336 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3337 } 3338 3339 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3340 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3341 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3342 if (bs < 0) bs = 1; 3343 3344 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3345 3346 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3347 M = header[1]; N = header[2]; 3348 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3349 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3350 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3351 3352 /* If global sizes are set, check if they are consistent with that given in the file */ 3353 if (sizesset) { 3354 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3355 } 3356 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3357 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3358 3359 /* determine ownership of all (block) rows */ 3360 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3361 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3362 else m = newMat->rmap->n; /* Set by user */ 3363 3364 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3365 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3366 3367 /* First process needs enough room for process with most rows */ 3368 if (!rank) { 3369 mmax = rowners[1]; 3370 for (i=2; i<=size; i++) { 3371 mmax = PetscMax(mmax, rowners[i]); 3372 } 3373 } else mmax = -1; /* unused, but compilers complain */ 3374 3375 rowners[0] = 0; 3376 for (i=2; i<=size; i++) { 3377 rowners[i] += rowners[i-1]; 3378 } 3379 rstart = rowners[rank]; 3380 rend = rowners[rank+1]; 3381 3382 /* distribute row lengths to all processors */ 3383 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3384 if (!rank) { 3385 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3386 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3387 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3388 for (j=0; j<m; j++) { 3389 procsnz[0] += ourlens[j]; 3390 } 3391 for (i=1; i<size; i++) { 3392 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3393 /* calculate the number of nonzeros on each processor */ 3394 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3395 procsnz[i] += rowlengths[j]; 3396 } 3397 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3398 } 3399 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3400 } else { 3401 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3402 } 3403 3404 if (!rank) { 3405 /* determine max buffer needed and allocate it */ 3406 maxnz = 0; 3407 for (i=0; i<size; i++) { 3408 maxnz = PetscMax(maxnz,procsnz[i]); 3409 } 3410 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3411 3412 /* read in my part of the matrix column indices */ 3413 nz = procsnz[0]; 3414 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3415 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3416 3417 /* read in every one elses and ship off */ 3418 for (i=1; i<size; i++) { 3419 nz = procsnz[i]; 3420 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3421 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3422 } 3423 ierr = PetscFree(cols);CHKERRQ(ierr); 3424 } else { 3425 /* determine buffer space needed for message */ 3426 nz = 0; 3427 for (i=0; i<m; i++) { 3428 nz += ourlens[i]; 3429 } 3430 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3431 3432 /* receive message of column indices*/ 3433 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3434 } 3435 3436 /* determine column ownership if matrix is not square */ 3437 if (N != M) { 3438 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3439 else n = newMat->cmap->n; 3440 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3441 cstart = cend - n; 3442 } else { 3443 cstart = rstart; 3444 cend = rend; 3445 n = cend - cstart; 3446 } 3447 3448 /* loop over local rows, determining number of off diagonal entries */ 3449 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3450 jj = 0; 3451 for (i=0; i<m; i++) { 3452 for (j=0; j<ourlens[i]; j++) { 3453 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3454 jj++; 3455 } 3456 } 3457 3458 for (i=0; i<m; i++) { 3459 ourlens[i] -= offlens[i]; 3460 } 3461 if (!sizesset) { 3462 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3463 } 3464 3465 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3466 3467 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3468 3469 for (i=0; i<m; i++) { 3470 ourlens[i] += offlens[i]; 3471 } 3472 3473 if (!rank) { 3474 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3475 3476 /* read in my part of the matrix numerical values */ 3477 nz = procsnz[0]; 3478 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3479 3480 /* insert into matrix */ 3481 jj = rstart; 3482 smycols = mycols; 3483 svals = vals; 3484 for (i=0; i<m; i++) { 3485 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3486 smycols += ourlens[i]; 3487 svals += ourlens[i]; 3488 jj++; 3489 } 3490 3491 /* read in other processors and ship out */ 3492 for (i=1; i<size; i++) { 3493 nz = procsnz[i]; 3494 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3495 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3496 } 3497 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3498 } else { 3499 /* receive numeric values */ 3500 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3501 3502 /* receive message of values*/ 3503 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3504 3505 /* insert into matrix */ 3506 jj = rstart; 3507 smycols = mycols; 3508 svals = vals; 3509 for (i=0; i<m; i++) { 3510 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3511 smycols += ourlens[i]; 3512 svals += ourlens[i]; 3513 jj++; 3514 } 3515 } 3516 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3517 ierr = PetscFree(vals);CHKERRQ(ierr); 3518 ierr = PetscFree(mycols);CHKERRQ(ierr); 3519 ierr = PetscFree(rowners);CHKERRQ(ierr); 3520 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3521 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3522 PetscFunctionReturn(0); 3523 } 3524 3525 #undef __FUNCT__ 3526 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3527 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3528 { 3529 PetscErrorCode ierr; 3530 IS iscol_local; 3531 PetscInt csize; 3532 3533 PetscFunctionBegin; 3534 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3535 if (call == MAT_REUSE_MATRIX) { 3536 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3537 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3538 } else { 3539 PetscInt cbs; 3540 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3541 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3542 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3543 } 3544 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3545 if (call == MAT_INITIAL_MATRIX) { 3546 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3547 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3548 } 3549 PetscFunctionReturn(0); 3550 } 3551 3552 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3553 #undef __FUNCT__ 3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3555 /* 3556 Not great since it makes two copies of the submatrix, first an SeqAIJ 3557 in local and then by concatenating the local matrices the end result. 3558 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3559 3560 Note: This requires a sequential iscol with all indices. 3561 */ 3562 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3563 { 3564 PetscErrorCode ierr; 3565 PetscMPIInt rank,size; 3566 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3567 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3568 PetscBool allcolumns, colflag; 3569 Mat M,Mreuse; 3570 MatScalar *vwork,*aa; 3571 MPI_Comm comm; 3572 Mat_SeqAIJ *aij; 3573 3574 PetscFunctionBegin; 3575 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3576 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3577 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3578 3579 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3580 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3581 if (colflag && ncol == mat->cmap->N) { 3582 allcolumns = PETSC_TRUE; 3583 } else { 3584 allcolumns = PETSC_FALSE; 3585 } 3586 if (call == MAT_REUSE_MATRIX) { 3587 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3588 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3589 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3590 } else { 3591 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3592 } 3593 3594 /* 3595 m - number of local rows 3596 n - number of columns (same on all processors) 3597 rstart - first row in new global matrix generated 3598 */ 3599 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3600 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3601 if (call == MAT_INITIAL_MATRIX) { 3602 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3603 ii = aij->i; 3604 jj = aij->j; 3605 3606 /* 3607 Determine the number of non-zeros in the diagonal and off-diagonal 3608 portions of the matrix in order to do correct preallocation 3609 */ 3610 3611 /* first get start and end of "diagonal" columns */ 3612 if (csize == PETSC_DECIDE) { 3613 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3614 if (mglobal == n) { /* square matrix */ 3615 nlocal = m; 3616 } else { 3617 nlocal = n/size + ((n % size) > rank); 3618 } 3619 } else { 3620 nlocal = csize; 3621 } 3622 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3623 rstart = rend - nlocal; 3624 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3625 3626 /* next, compute all the lengths */ 3627 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3628 olens = dlens + m; 3629 for (i=0; i<m; i++) { 3630 jend = ii[i+1] - ii[i]; 3631 olen = 0; 3632 dlen = 0; 3633 for (j=0; j<jend; j++) { 3634 if (*jj < rstart || *jj >= rend) olen++; 3635 else dlen++; 3636 jj++; 3637 } 3638 olens[i] = olen; 3639 dlens[i] = dlen; 3640 } 3641 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3642 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3643 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3644 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3645 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3646 ierr = PetscFree(dlens);CHKERRQ(ierr); 3647 } else { 3648 PetscInt ml,nl; 3649 3650 M = *newmat; 3651 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3652 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3653 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3654 /* 3655 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3656 rather than the slower MatSetValues(). 3657 */ 3658 M->was_assembled = PETSC_TRUE; 3659 M->assembled = PETSC_FALSE; 3660 } 3661 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3662 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3663 ii = aij->i; 3664 jj = aij->j; 3665 aa = aij->a; 3666 for (i=0; i<m; i++) { 3667 row = rstart + i; 3668 nz = ii[i+1] - ii[i]; 3669 cwork = jj; jj += nz; 3670 vwork = aa; aa += nz; 3671 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3672 } 3673 3674 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3675 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3676 *newmat = M; 3677 3678 /* save submatrix used in processor for next request */ 3679 if (call == MAT_INITIAL_MATRIX) { 3680 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3681 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3682 } 3683 PetscFunctionReturn(0); 3684 } 3685 3686 #undef __FUNCT__ 3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3688 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3689 { 3690 PetscInt m,cstart, cend,j,nnz,i,d; 3691 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3692 const PetscInt *JJ; 3693 PetscScalar *values; 3694 PetscErrorCode ierr; 3695 3696 PetscFunctionBegin; 3697 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3698 3699 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3700 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3701 m = B->rmap->n; 3702 cstart = B->cmap->rstart; 3703 cend = B->cmap->rend; 3704 rstart = B->rmap->rstart; 3705 3706 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3707 3708 #if defined(PETSC_USE_DEBUGGING) 3709 for (i=0; i<m; i++) { 3710 nnz = Ii[i+1]- Ii[i]; 3711 JJ = J + Ii[i]; 3712 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3713 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3714 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3715 } 3716 #endif 3717 3718 for (i=0; i<m; i++) { 3719 nnz = Ii[i+1]- Ii[i]; 3720 JJ = J + Ii[i]; 3721 nnz_max = PetscMax(nnz_max,nnz); 3722 d = 0; 3723 for (j=0; j<nnz; j++) { 3724 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3725 } 3726 d_nnz[i] = d; 3727 o_nnz[i] = nnz - d; 3728 } 3729 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3730 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3731 3732 if (v) values = (PetscScalar*)v; 3733 else { 3734 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3735 } 3736 3737 for (i=0; i<m; i++) { 3738 ii = i + rstart; 3739 nnz = Ii[i+1]- Ii[i]; 3740 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3741 } 3742 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3743 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3744 3745 if (!v) { 3746 ierr = PetscFree(values);CHKERRQ(ierr); 3747 } 3748 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3749 PetscFunctionReturn(0); 3750 } 3751 3752 #undef __FUNCT__ 3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3754 /*@ 3755 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3756 (the default parallel PETSc format). 3757 3758 Collective on MPI_Comm 3759 3760 Input Parameters: 3761 + B - the matrix 3762 . i - the indices into j for the start of each local row (starts with zero) 3763 . j - the column indices for each local row (starts with zero) 3764 - v - optional values in the matrix 3765 3766 Level: developer 3767 3768 Notes: 3769 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3770 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3771 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3772 3773 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3774 3775 The format which is used for the sparse matrix input, is equivalent to a 3776 row-major ordering.. i.e for the following matrix, the input data expected is 3777 as shown: 3778 3779 1 0 0 3780 2 0 3 P0 3781 ------- 3782 4 5 6 P1 3783 3784 Process0 [P0]: rows_owned=[0,1] 3785 i = {0,1,3} [size = nrow+1 = 2+1] 3786 j = {0,0,2} [size = nz = 6] 3787 v = {1,2,3} [size = nz = 6] 3788 3789 Process1 [P1]: rows_owned=[2] 3790 i = {0,3} [size = nrow+1 = 1+1] 3791 j = {0,1,2} [size = nz = 6] 3792 v = {4,5,6} [size = nz = 6] 3793 3794 .keywords: matrix, aij, compressed row, sparse, parallel 3795 3796 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3797 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3798 @*/ 3799 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3800 { 3801 PetscErrorCode ierr; 3802 3803 PetscFunctionBegin; 3804 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3805 PetscFunctionReturn(0); 3806 } 3807 3808 #undef __FUNCT__ 3809 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3810 /*@C 3811 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3812 (the default parallel PETSc format). For good matrix assembly performance 3813 the user should preallocate the matrix storage by setting the parameters 3814 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3815 performance can be increased by more than a factor of 50. 3816 3817 Collective on MPI_Comm 3818 3819 Input Parameters: 3820 + B - the matrix 3821 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3822 (same value is used for all local rows) 3823 . d_nnz - array containing the number of nonzeros in the various rows of the 3824 DIAGONAL portion of the local submatrix (possibly different for each row) 3825 or NULL, if d_nz is used to specify the nonzero structure. 3826 The size of this array is equal to the number of local rows, i.e 'm'. 3827 For matrices that will be factored, you must leave room for (and set) 3828 the diagonal entry even if it is zero. 3829 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3830 submatrix (same value is used for all local rows). 3831 - o_nnz - array containing the number of nonzeros in the various rows of the 3832 OFF-DIAGONAL portion of the local submatrix (possibly different for 3833 each row) or NULL, if o_nz is used to specify the nonzero 3834 structure. The size of this array is equal to the number 3835 of local rows, i.e 'm'. 3836 3837 If the *_nnz parameter is given then the *_nz parameter is ignored 3838 3839 The AIJ format (also called the Yale sparse matrix format or 3840 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3841 storage. The stored row and column indices begin with zero. 3842 See Users-Manual: ch_mat for details. 3843 3844 The parallel matrix is partitioned such that the first m0 rows belong to 3845 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3846 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3847 3848 The DIAGONAL portion of the local submatrix of a processor can be defined 3849 as the submatrix which is obtained by extraction the part corresponding to 3850 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3851 first row that belongs to the processor, r2 is the last row belonging to 3852 the this processor, and c1-c2 is range of indices of the local part of a 3853 vector suitable for applying the matrix to. This is an mxn matrix. In the 3854 common case of a square matrix, the row and column ranges are the same and 3855 the DIAGONAL part is also square. The remaining portion of the local 3856 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3857 3858 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3859 3860 You can call MatGetInfo() to get information on how effective the preallocation was; 3861 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3862 You can also run with the option -info and look for messages with the string 3863 malloc in them to see if additional memory allocation was needed. 3864 3865 Example usage: 3866 3867 Consider the following 8x8 matrix with 34 non-zero values, that is 3868 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3869 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3870 as follows: 3871 3872 .vb 3873 1 2 0 | 0 3 0 | 0 4 3874 Proc0 0 5 6 | 7 0 0 | 8 0 3875 9 0 10 | 11 0 0 | 12 0 3876 ------------------------------------- 3877 13 0 14 | 15 16 17 | 0 0 3878 Proc1 0 18 0 | 19 20 21 | 0 0 3879 0 0 0 | 22 23 0 | 24 0 3880 ------------------------------------- 3881 Proc2 25 26 27 | 0 0 28 | 29 0 3882 30 0 0 | 31 32 33 | 0 34 3883 .ve 3884 3885 This can be represented as a collection of submatrices as: 3886 3887 .vb 3888 A B C 3889 D E F 3890 G H I 3891 .ve 3892 3893 Where the submatrices A,B,C are owned by proc0, D,E,F are 3894 owned by proc1, G,H,I are owned by proc2. 3895 3896 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3897 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3898 The 'M','N' parameters are 8,8, and have the same values on all procs. 3899 3900 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3901 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3902 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3903 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3904 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3905 matrix, ans [DF] as another SeqAIJ matrix. 3906 3907 When d_nz, o_nz parameters are specified, d_nz storage elements are 3908 allocated for every row of the local diagonal submatrix, and o_nz 3909 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3910 One way to choose d_nz and o_nz is to use the max nonzerors per local 3911 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3912 In this case, the values of d_nz,o_nz are: 3913 .vb 3914 proc0 : dnz = 2, o_nz = 2 3915 proc1 : dnz = 3, o_nz = 2 3916 proc2 : dnz = 1, o_nz = 4 3917 .ve 3918 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3919 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3920 for proc3. i.e we are using 12+15+10=37 storage locations to store 3921 34 values. 3922 3923 When d_nnz, o_nnz parameters are specified, the storage is specified 3924 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3925 In the above case the values for d_nnz,o_nnz are: 3926 .vb 3927 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3928 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3929 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3930 .ve 3931 Here the space allocated is sum of all the above values i.e 34, and 3932 hence pre-allocation is perfect. 3933 3934 Level: intermediate 3935 3936 .keywords: matrix, aij, compressed row, sparse, parallel 3937 3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3939 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3940 @*/ 3941 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3942 { 3943 PetscErrorCode ierr; 3944 3945 PetscFunctionBegin; 3946 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3947 PetscValidType(B,1); 3948 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3949 PetscFunctionReturn(0); 3950 } 3951 3952 #undef __FUNCT__ 3953 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3954 /*@ 3955 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3956 CSR format the local rows. 3957 3958 Collective on MPI_Comm 3959 3960 Input Parameters: 3961 + comm - MPI communicator 3962 . m - number of local rows (Cannot be PETSC_DECIDE) 3963 . n - This value should be the same as the local size used in creating the 3964 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3965 calculated if N is given) For square matrices n is almost always m. 3966 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3967 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3968 . i - row indices 3969 . j - column indices 3970 - a - matrix values 3971 3972 Output Parameter: 3973 . mat - the matrix 3974 3975 Level: intermediate 3976 3977 Notes: 3978 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3979 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3980 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3981 3982 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3983 3984 The format which is used for the sparse matrix input, is equivalent to a 3985 row-major ordering.. i.e for the following matrix, the input data expected is 3986 as shown: 3987 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0]: rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = nz = 6] 3996 v = {1,2,3} [size = nz = 6] 3997 3998 Process1 [P1]: rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = nz = 6] 4001 v = {4,5,6} [size = nz = 6] 4002 4003 .keywords: matrix, aij, compressed row, sparse, parallel 4004 4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4006 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4007 @*/ 4008 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4009 { 4010 PetscErrorCode ierr; 4011 4012 PetscFunctionBegin; 4013 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4014 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4015 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4016 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4017 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4018 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4019 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4020 PetscFunctionReturn(0); 4021 } 4022 4023 #undef __FUNCT__ 4024 #define __FUNCT__ "MatCreateAIJ" 4025 /*@C 4026 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4027 (the default parallel PETSc format). For good matrix assembly performance 4028 the user should preallocate the matrix storage by setting the parameters 4029 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4030 performance can be increased by more than a factor of 50. 4031 4032 Collective on MPI_Comm 4033 4034 Input Parameters: 4035 + comm - MPI communicator 4036 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4037 This value should be the same as the local size used in creating the 4038 y vector for the matrix-vector product y = Ax. 4039 . n - This value should be the same as the local size used in creating the 4040 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4041 calculated if N is given) For square matrices n is almost always m. 4042 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4043 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4044 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4045 (same value is used for all local rows) 4046 . d_nnz - array containing the number of nonzeros in the various rows of the 4047 DIAGONAL portion of the local submatrix (possibly different for each row) 4048 or NULL, if d_nz is used to specify the nonzero structure. 4049 The size of this array is equal to the number of local rows, i.e 'm'. 4050 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4051 submatrix (same value is used for all local rows). 4052 - o_nnz - array containing the number of nonzeros in the various rows of the 4053 OFF-DIAGONAL portion of the local submatrix (possibly different for 4054 each row) or NULL, if o_nz is used to specify the nonzero 4055 structure. The size of this array is equal to the number 4056 of local rows, i.e 'm'. 4057 4058 Output Parameter: 4059 . A - the matrix 4060 4061 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4062 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4063 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4064 4065 Notes: 4066 If the *_nnz parameter is given then the *_nz parameter is ignored 4067 4068 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4069 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4070 storage requirements for this matrix. 4071 4072 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4073 processor than it must be used on all processors that share the object for 4074 that argument. 4075 4076 The user MUST specify either the local or global matrix dimensions 4077 (possibly both). 4078 4079 The parallel matrix is partitioned across processors such that the 4080 first m0 rows belong to process 0, the next m1 rows belong to 4081 process 1, the next m2 rows belong to process 2 etc.. where 4082 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4083 values corresponding to [m x N] submatrix. 4084 4085 The columns are logically partitioned with the n0 columns belonging 4086 to 0th partition, the next n1 columns belonging to the next 4087 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4088 4089 The DIAGONAL portion of the local submatrix on any given processor 4090 is the submatrix corresponding to the rows and columns m,n 4091 corresponding to the given processor. i.e diagonal matrix on 4092 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4093 etc. The remaining portion of the local submatrix [m x (N-n)] 4094 constitute the OFF-DIAGONAL portion. The example below better 4095 illustrates this concept. 4096 4097 For a square global matrix we define each processor's diagonal portion 4098 to be its local rows and the corresponding columns (a square submatrix); 4099 each processor's off-diagonal portion encompasses the remainder of the 4100 local matrix (a rectangular submatrix). 4101 4102 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4103 4104 When calling this routine with a single process communicator, a matrix of 4105 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4106 type of communicator, use the construction mechanism: 4107 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4108 4109 By default, this format uses inodes (identical nodes) when possible. 4110 We search for consecutive rows with the same nonzero structure, thereby 4111 reusing matrix information to achieve increased efficiency. 4112 4113 Options Database Keys: 4114 + -mat_no_inode - Do not use inodes 4115 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4116 - -mat_aij_oneindex - Internally use indexing starting at 1 4117 rather than 0. Note that when calling MatSetValues(), 4118 the user still MUST index entries starting at 0! 4119 4120 4121 Example usage: 4122 4123 Consider the following 8x8 matrix with 34 non-zero values, that is 4124 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4125 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4126 as follows: 4127 4128 .vb 4129 1 2 0 | 0 3 0 | 0 4 4130 Proc0 0 5 6 | 7 0 0 | 8 0 4131 9 0 10 | 11 0 0 | 12 0 4132 ------------------------------------- 4133 13 0 14 | 15 16 17 | 0 0 4134 Proc1 0 18 0 | 19 20 21 | 0 0 4135 0 0 0 | 22 23 0 | 24 0 4136 ------------------------------------- 4137 Proc2 25 26 27 | 0 0 28 | 29 0 4138 30 0 0 | 31 32 33 | 0 34 4139 .ve 4140 4141 This can be represented as a collection of submatrices as: 4142 4143 .vb 4144 A B C 4145 D E F 4146 G H I 4147 .ve 4148 4149 Where the submatrices A,B,C are owned by proc0, D,E,F are 4150 owned by proc1, G,H,I are owned by proc2. 4151 4152 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4153 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4154 The 'M','N' parameters are 8,8, and have the same values on all procs. 4155 4156 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4157 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4158 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4159 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4160 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4161 matrix, ans [DF] as another SeqAIJ matrix. 4162 4163 When d_nz, o_nz parameters are specified, d_nz storage elements are 4164 allocated for every row of the local diagonal submatrix, and o_nz 4165 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4166 One way to choose d_nz and o_nz is to use the max nonzerors per local 4167 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4168 In this case, the values of d_nz,o_nz are: 4169 .vb 4170 proc0 : dnz = 2, o_nz = 2 4171 proc1 : dnz = 3, o_nz = 2 4172 proc2 : dnz = 1, o_nz = 4 4173 .ve 4174 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4175 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4176 for proc3. i.e we are using 12+15+10=37 storage locations to store 4177 34 values. 4178 4179 When d_nnz, o_nnz parameters are specified, the storage is specified 4180 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4181 In the above case the values for d_nnz,o_nnz are: 4182 .vb 4183 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4184 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4185 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4186 .ve 4187 Here the space allocated is sum of all the above values i.e 34, and 4188 hence pre-allocation is perfect. 4189 4190 Level: intermediate 4191 4192 .keywords: matrix, aij, compressed row, sparse, parallel 4193 4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4195 MPIAIJ, MatCreateMPIAIJWithArrays() 4196 @*/ 4197 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4198 { 4199 PetscErrorCode ierr; 4200 PetscMPIInt size; 4201 4202 PetscFunctionBegin; 4203 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4204 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4205 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4206 if (size > 1) { 4207 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4208 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4209 } else { 4210 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4211 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4212 } 4213 PetscFunctionReturn(0); 4214 } 4215 4216 #undef __FUNCT__ 4217 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4218 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4219 { 4220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4221 4222 PetscFunctionBegin; 4223 if (Ad) *Ad = a->A; 4224 if (Ao) *Ao = a->B; 4225 if (colmap) *colmap = a->garray; 4226 PetscFunctionReturn(0); 4227 } 4228 4229 #undef __FUNCT__ 4230 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4231 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4232 { 4233 PetscErrorCode ierr; 4234 PetscInt i; 4235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4236 4237 PetscFunctionBegin; 4238 if (coloring->ctype == IS_COLORING_GLOBAL) { 4239 ISColoringValue *allcolors,*colors; 4240 ISColoring ocoloring; 4241 4242 /* set coloring for diagonal portion */ 4243 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4244 4245 /* set coloring for off-diagonal portion */ 4246 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4247 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4248 for (i=0; i<a->B->cmap->n; i++) { 4249 colors[i] = allcolors[a->garray[i]]; 4250 } 4251 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4252 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4253 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4254 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4255 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4256 ISColoringValue *colors; 4257 PetscInt *larray; 4258 ISColoring ocoloring; 4259 4260 /* set coloring for diagonal portion */ 4261 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4262 for (i=0; i<a->A->cmap->n; i++) { 4263 larray[i] = i + A->cmap->rstart; 4264 } 4265 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4266 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4267 for (i=0; i<a->A->cmap->n; i++) { 4268 colors[i] = coloring->colors[larray[i]]; 4269 } 4270 ierr = PetscFree(larray);CHKERRQ(ierr); 4271 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4272 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4273 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4274 4275 /* set coloring for off-diagonal portion */ 4276 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4277 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4278 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4279 for (i=0; i<a->B->cmap->n; i++) { 4280 colors[i] = coloring->colors[larray[i]]; 4281 } 4282 ierr = PetscFree(larray);CHKERRQ(ierr); 4283 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4284 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4285 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4286 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4287 PetscFunctionReturn(0); 4288 } 4289 4290 #undef __FUNCT__ 4291 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4292 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4293 { 4294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4295 PetscErrorCode ierr; 4296 4297 PetscFunctionBegin; 4298 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4299 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4300 PetscFunctionReturn(0); 4301 } 4302 4303 #undef __FUNCT__ 4304 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4305 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4306 { 4307 PetscErrorCode ierr; 4308 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4309 PetscInt *indx; 4310 4311 PetscFunctionBegin; 4312 /* This routine will ONLY return MPIAIJ type matrix */ 4313 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4314 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4315 if (n == PETSC_DECIDE) { 4316 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4317 } 4318 /* Check sum(n) = N */ 4319 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4320 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4321 4322 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4323 rstart -= m; 4324 4325 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4326 for (i=0; i<m; i++) { 4327 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4328 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4329 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4330 } 4331 4332 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4333 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4334 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4335 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4336 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4337 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4338 PetscFunctionReturn(0); 4339 } 4340 4341 #undef __FUNCT__ 4342 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4343 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4344 { 4345 PetscErrorCode ierr; 4346 PetscInt m,N,i,rstart,nnz,Ii; 4347 PetscInt *indx; 4348 PetscScalar *values; 4349 4350 PetscFunctionBegin; 4351 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4352 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4353 for (i=0; i<m; i++) { 4354 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4355 Ii = i + rstart; 4356 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4357 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4358 } 4359 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4360 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4361 PetscFunctionReturn(0); 4362 } 4363 4364 #undef __FUNCT__ 4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4366 /*@ 4367 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4368 matrices from each processor 4369 4370 Collective on MPI_Comm 4371 4372 Input Parameters: 4373 + comm - the communicators the parallel matrix will live on 4374 . inmat - the input sequential matrices 4375 . n - number of local columns (or PETSC_DECIDE) 4376 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4377 4378 Output Parameter: 4379 . outmat - the parallel matrix generated 4380 4381 Level: advanced 4382 4383 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4384 4385 @*/ 4386 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4387 { 4388 PetscErrorCode ierr; 4389 PetscMPIInt size; 4390 4391 PetscFunctionBegin; 4392 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4393 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4394 if (size == 1) { 4395 if (scall == MAT_INITIAL_MATRIX) { 4396 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4397 } else { 4398 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4399 } 4400 } else { 4401 if (scall == MAT_INITIAL_MATRIX) { 4402 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4403 } 4404 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4405 } 4406 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4407 PetscFunctionReturn(0); 4408 } 4409 4410 #undef __FUNCT__ 4411 #define __FUNCT__ "MatFileSplit" 4412 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4413 { 4414 PetscErrorCode ierr; 4415 PetscMPIInt rank; 4416 PetscInt m,N,i,rstart,nnz; 4417 size_t len; 4418 const PetscInt *indx; 4419 PetscViewer out; 4420 char *name; 4421 Mat B; 4422 const PetscScalar *values; 4423 4424 PetscFunctionBegin; 4425 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4426 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4427 /* Should this be the type of the diagonal block of A? */ 4428 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4429 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4430 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4431 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4432 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4433 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4434 for (i=0; i<m; i++) { 4435 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4436 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4437 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4438 } 4439 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 4442 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4443 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4444 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4445 sprintf(name,"%s.%d",outfile,rank); 4446 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4447 ierr = PetscFree(name);CHKERRQ(ierr); 4448 ierr = MatView(B,out);CHKERRQ(ierr); 4449 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4450 ierr = MatDestroy(&B);CHKERRQ(ierr); 4451 PetscFunctionReturn(0); 4452 } 4453 4454 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4455 #undef __FUNCT__ 4456 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4457 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4458 { 4459 PetscErrorCode ierr; 4460 Mat_Merge_SeqsToMPI *merge; 4461 PetscContainer container; 4462 4463 PetscFunctionBegin; 4464 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4465 if (container) { 4466 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4475 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4476 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4477 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4478 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4479 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4480 ierr = PetscFree(merge);CHKERRQ(ierr); 4481 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4482 } 4483 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 #include <../src/mat/utils/freespace.h> 4488 #include <petscbt.h> 4489 4490 #undef __FUNCT__ 4491 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4492 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4493 { 4494 PetscErrorCode ierr; 4495 MPI_Comm comm; 4496 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4497 PetscMPIInt size,rank,taga,*len_s; 4498 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4499 PetscInt proc,m; 4500 PetscInt **buf_ri,**buf_rj; 4501 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4502 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4503 MPI_Request *s_waits,*r_waits; 4504 MPI_Status *status; 4505 MatScalar *aa=a->a; 4506 MatScalar **abuf_r,*ba_i; 4507 Mat_Merge_SeqsToMPI *merge; 4508 PetscContainer container; 4509 4510 PetscFunctionBegin; 4511 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4512 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4513 4514 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4516 4517 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4518 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4519 4520 bi = merge->bi; 4521 bj = merge->bj; 4522 buf_ri = merge->buf_ri; 4523 buf_rj = merge->buf_rj; 4524 4525 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4526 owners = merge->rowmap->range; 4527 len_s = merge->len_s; 4528 4529 /* send and recv matrix values */ 4530 /*-----------------------------*/ 4531 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4532 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4533 4534 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4535 for (proc=0,k=0; proc<size; proc++) { 4536 if (!len_s[proc]) continue; 4537 i = owners[proc]; 4538 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4539 k++; 4540 } 4541 4542 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4543 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4544 ierr = PetscFree(status);CHKERRQ(ierr); 4545 4546 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4547 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4548 4549 /* insert mat values of mpimat */ 4550 /*----------------------------*/ 4551 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4552 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4553 4554 for (k=0; k<merge->nrecv; k++) { 4555 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4556 nrows = *(buf_ri_k[k]); 4557 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4558 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4559 } 4560 4561 /* set values of ba */ 4562 m = merge->rowmap->n; 4563 for (i=0; i<m; i++) { 4564 arow = owners[rank] + i; 4565 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4566 bnzi = bi[i+1] - bi[i]; 4567 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4568 4569 /* add local non-zero vals of this proc's seqmat into ba */ 4570 anzi = ai[arow+1] - ai[arow]; 4571 aj = a->j + ai[arow]; 4572 aa = a->a + ai[arow]; 4573 nextaj = 0; 4574 for (j=0; nextaj<anzi; j++) { 4575 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4576 ba_i[j] += aa[nextaj++]; 4577 } 4578 } 4579 4580 /* add received vals into ba */ 4581 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4582 /* i-th row */ 4583 if (i == *nextrow[k]) { 4584 anzi = *(nextai[k]+1) - *nextai[k]; 4585 aj = buf_rj[k] + *(nextai[k]); 4586 aa = abuf_r[k] + *(nextai[k]); 4587 nextaj = 0; 4588 for (j=0; nextaj<anzi; j++) { 4589 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4590 ba_i[j] += aa[nextaj++]; 4591 } 4592 } 4593 nextrow[k]++; nextai[k]++; 4594 } 4595 } 4596 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4597 } 4598 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4599 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4600 4601 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4602 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4603 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4604 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4605 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4606 PetscFunctionReturn(0); 4607 } 4608 4609 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4610 4611 #undef __FUNCT__ 4612 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4613 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4614 { 4615 PetscErrorCode ierr; 4616 Mat B_mpi; 4617 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4618 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4619 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4620 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4621 PetscInt len,proc,*dnz,*onz,bs,cbs; 4622 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4623 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4624 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4625 MPI_Status *status; 4626 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4627 PetscBT lnkbt; 4628 Mat_Merge_SeqsToMPI *merge; 4629 PetscContainer container; 4630 4631 PetscFunctionBegin; 4632 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4633 4634 /* make sure it is a PETSc comm */ 4635 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4636 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4637 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4638 4639 ierr = PetscNew(&merge);CHKERRQ(ierr); 4640 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4641 4642 /* determine row ownership */ 4643 /*---------------------------------------------------------*/ 4644 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4645 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4646 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4647 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4648 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4649 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4650 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4651 4652 m = merge->rowmap->n; 4653 owners = merge->rowmap->range; 4654 4655 /* determine the number of messages to send, their lengths */ 4656 /*---------------------------------------------------------*/ 4657 len_s = merge->len_s; 4658 4659 len = 0; /* length of buf_si[] */ 4660 merge->nsend = 0; 4661 for (proc=0; proc<size; proc++) { 4662 len_si[proc] = 0; 4663 if (proc == rank) { 4664 len_s[proc] = 0; 4665 } else { 4666 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4667 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4668 } 4669 if (len_s[proc]) { 4670 merge->nsend++; 4671 nrows = 0; 4672 for (i=owners[proc]; i<owners[proc+1]; i++) { 4673 if (ai[i+1] > ai[i]) nrows++; 4674 } 4675 len_si[proc] = 2*(nrows+1); 4676 len += len_si[proc]; 4677 } 4678 } 4679 4680 /* determine the number and length of messages to receive for ij-structure */ 4681 /*-------------------------------------------------------------------------*/ 4682 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4683 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4684 4685 /* post the Irecv of j-structure */ 4686 /*-------------------------------*/ 4687 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4688 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4689 4690 /* post the Isend of j-structure */ 4691 /*--------------------------------*/ 4692 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4693 4694 for (proc=0, k=0; proc<size; proc++) { 4695 if (!len_s[proc]) continue; 4696 i = owners[proc]; 4697 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4698 k++; 4699 } 4700 4701 /* receives and sends of j-structure are complete */ 4702 /*------------------------------------------------*/ 4703 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4704 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4705 4706 /* send and recv i-structure */ 4707 /*---------------------------*/ 4708 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4709 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4710 4711 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4712 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4713 for (proc=0,k=0; proc<size; proc++) { 4714 if (!len_s[proc]) continue; 4715 /* form outgoing message for i-structure: 4716 buf_si[0]: nrows to be sent 4717 [1:nrows]: row index (global) 4718 [nrows+1:2*nrows+1]: i-structure index 4719 */ 4720 /*-------------------------------------------*/ 4721 nrows = len_si[proc]/2 - 1; 4722 buf_si_i = buf_si + nrows+1; 4723 buf_si[0] = nrows; 4724 buf_si_i[0] = 0; 4725 nrows = 0; 4726 for (i=owners[proc]; i<owners[proc+1]; i++) { 4727 anzi = ai[i+1] - ai[i]; 4728 if (anzi) { 4729 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4730 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4731 nrows++; 4732 } 4733 } 4734 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4735 k++; 4736 buf_si += len_si[proc]; 4737 } 4738 4739 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4740 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4741 4742 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4743 for (i=0; i<merge->nrecv; i++) { 4744 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4745 } 4746 4747 ierr = PetscFree(len_si);CHKERRQ(ierr); 4748 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4749 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4750 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4751 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4752 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4753 ierr = PetscFree(status);CHKERRQ(ierr); 4754 4755 /* compute a local seq matrix in each processor */ 4756 /*----------------------------------------------*/ 4757 /* allocate bi array and free space for accumulating nonzero column info */ 4758 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4759 bi[0] = 0; 4760 4761 /* create and initialize a linked list */ 4762 nlnk = N+1; 4763 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4764 4765 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4766 len = ai[owners[rank+1]] - ai[owners[rank]]; 4767 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4768 4769 current_space = free_space; 4770 4771 /* determine symbolic info for each local row */ 4772 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4773 4774 for (k=0; k<merge->nrecv; k++) { 4775 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4776 nrows = *buf_ri_k[k]; 4777 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4778 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4779 } 4780 4781 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4782 len = 0; 4783 for (i=0; i<m; i++) { 4784 bnzi = 0; 4785 /* add local non-zero cols of this proc's seqmat into lnk */ 4786 arow = owners[rank] + i; 4787 anzi = ai[arow+1] - ai[arow]; 4788 aj = a->j + ai[arow]; 4789 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4790 bnzi += nlnk; 4791 /* add received col data into lnk */ 4792 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4793 if (i == *nextrow[k]) { /* i-th row */ 4794 anzi = *(nextai[k]+1) - *nextai[k]; 4795 aj = buf_rj[k] + *nextai[k]; 4796 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4797 bnzi += nlnk; 4798 nextrow[k]++; nextai[k]++; 4799 } 4800 } 4801 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4802 4803 /* if free space is not available, make more free space */ 4804 if (current_space->local_remaining<bnzi) { 4805 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4806 nspacedouble++; 4807 } 4808 /* copy data into free space, then initialize lnk */ 4809 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4810 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4811 4812 current_space->array += bnzi; 4813 current_space->local_used += bnzi; 4814 current_space->local_remaining -= bnzi; 4815 4816 bi[i+1] = bi[i] + bnzi; 4817 } 4818 4819 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4820 4821 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4822 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4823 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4824 4825 /* create symbolic parallel matrix B_mpi */ 4826 /*---------------------------------------*/ 4827 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4828 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4829 if (n==PETSC_DECIDE) { 4830 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4831 } else { 4832 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4833 } 4834 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4835 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4836 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4837 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4838 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4839 4840 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4841 B_mpi->assembled = PETSC_FALSE; 4842 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4843 merge->bi = bi; 4844 merge->bj = bj; 4845 merge->buf_ri = buf_ri; 4846 merge->buf_rj = buf_rj; 4847 merge->coi = NULL; 4848 merge->coj = NULL; 4849 merge->owners_co = NULL; 4850 4851 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4852 4853 /* attach the supporting struct to B_mpi for reuse */ 4854 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4855 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4856 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4857 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4858 *mpimat = B_mpi; 4859 4860 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4861 PetscFunctionReturn(0); 4862 } 4863 4864 #undef __FUNCT__ 4865 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4866 /*@C 4867 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4868 matrices from each processor 4869 4870 Collective on MPI_Comm 4871 4872 Input Parameters: 4873 + comm - the communicators the parallel matrix will live on 4874 . seqmat - the input sequential matrices 4875 . m - number of local rows (or PETSC_DECIDE) 4876 . n - number of local columns (or PETSC_DECIDE) 4877 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4878 4879 Output Parameter: 4880 . mpimat - the parallel matrix generated 4881 4882 Level: advanced 4883 4884 Notes: 4885 The dimensions of the sequential matrix in each processor MUST be the same. 4886 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4887 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4888 @*/ 4889 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4890 { 4891 PetscErrorCode ierr; 4892 PetscMPIInt size; 4893 4894 PetscFunctionBegin; 4895 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4896 if (size == 1) { 4897 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4898 if (scall == MAT_INITIAL_MATRIX) { 4899 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4900 } else { 4901 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4902 } 4903 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4904 PetscFunctionReturn(0); 4905 } 4906 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4907 if (scall == MAT_INITIAL_MATRIX) { 4908 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4909 } 4910 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4911 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4912 PetscFunctionReturn(0); 4913 } 4914 4915 #undef __FUNCT__ 4916 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4917 /*@ 4918 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4919 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4920 with MatGetSize() 4921 4922 Not Collective 4923 4924 Input Parameters: 4925 + A - the matrix 4926 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4927 4928 Output Parameter: 4929 . A_loc - the local sequential matrix generated 4930 4931 Level: developer 4932 4933 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4934 4935 @*/ 4936 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4937 { 4938 PetscErrorCode ierr; 4939 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4940 Mat_SeqAIJ *mat,*a,*b; 4941 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4942 MatScalar *aa,*ba,*cam; 4943 PetscScalar *ca; 4944 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4945 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4946 PetscBool match; 4947 MPI_Comm comm; 4948 PetscMPIInt size; 4949 4950 PetscFunctionBegin; 4951 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4952 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4953 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4954 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4955 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4956 4957 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4958 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4959 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4960 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4961 aa = a->a; ba = b->a; 4962 if (scall == MAT_INITIAL_MATRIX) { 4963 if (size == 1) { 4964 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4965 PetscFunctionReturn(0); 4966 } 4967 4968 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4969 ci[0] = 0; 4970 for (i=0; i<am; i++) { 4971 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4972 } 4973 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4974 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4975 k = 0; 4976 for (i=0; i<am; i++) { 4977 ncols_o = bi[i+1] - bi[i]; 4978 ncols_d = ai[i+1] - ai[i]; 4979 /* off-diagonal portion of A */ 4980 for (jo=0; jo<ncols_o; jo++) { 4981 col = cmap[*bj]; 4982 if (col >= cstart) break; 4983 cj[k] = col; bj++; 4984 ca[k++] = *ba++; 4985 } 4986 /* diagonal portion of A */ 4987 for (j=0; j<ncols_d; j++) { 4988 cj[k] = cstart + *aj++; 4989 ca[k++] = *aa++; 4990 } 4991 /* off-diagonal portion of A */ 4992 for (j=jo; j<ncols_o; j++) { 4993 cj[k] = cmap[*bj++]; 4994 ca[k++] = *ba++; 4995 } 4996 } 4997 /* put together the new matrix */ 4998 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4999 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5000 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5001 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5002 mat->free_a = PETSC_TRUE; 5003 mat->free_ij = PETSC_TRUE; 5004 mat->nonew = 0; 5005 } else if (scall == MAT_REUSE_MATRIX) { 5006 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5007 ci = mat->i; cj = mat->j; cam = mat->a; 5008 for (i=0; i<am; i++) { 5009 /* off-diagonal portion of A */ 5010 ncols_o = bi[i+1] - bi[i]; 5011 for (jo=0; jo<ncols_o; jo++) { 5012 col = cmap[*bj]; 5013 if (col >= cstart) break; 5014 *cam++ = *ba++; bj++; 5015 } 5016 /* diagonal portion of A */ 5017 ncols_d = ai[i+1] - ai[i]; 5018 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5019 /* off-diagonal portion of A */ 5020 for (j=jo; j<ncols_o; j++) { 5021 *cam++ = *ba++; bj++; 5022 } 5023 } 5024 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5025 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5026 PetscFunctionReturn(0); 5027 } 5028 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5031 /*@C 5032 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5033 5034 Not Collective 5035 5036 Input Parameters: 5037 + A - the matrix 5038 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5039 - row, col - index sets of rows and columns to extract (or NULL) 5040 5041 Output Parameter: 5042 . A_loc - the local sequential matrix generated 5043 5044 Level: developer 5045 5046 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5047 5048 @*/ 5049 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5050 { 5051 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5052 PetscErrorCode ierr; 5053 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5054 IS isrowa,iscola; 5055 Mat *aloc; 5056 PetscBool match; 5057 5058 PetscFunctionBegin; 5059 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5060 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5061 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5062 if (!row) { 5063 start = A->rmap->rstart; end = A->rmap->rend; 5064 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5065 } else { 5066 isrowa = *row; 5067 } 5068 if (!col) { 5069 start = A->cmap->rstart; 5070 cmap = a->garray; 5071 nzA = a->A->cmap->n; 5072 nzB = a->B->cmap->n; 5073 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5074 ncols = 0; 5075 for (i=0; i<nzB; i++) { 5076 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5077 else break; 5078 } 5079 imark = i; 5080 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5081 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5082 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5083 } else { 5084 iscola = *col; 5085 } 5086 if (scall != MAT_INITIAL_MATRIX) { 5087 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5088 aloc[0] = *A_loc; 5089 } 5090 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5091 *A_loc = aloc[0]; 5092 ierr = PetscFree(aloc);CHKERRQ(ierr); 5093 if (!row) { 5094 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5095 } 5096 if (!col) { 5097 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5098 } 5099 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5100 PetscFunctionReturn(0); 5101 } 5102 5103 #undef __FUNCT__ 5104 #define __FUNCT__ "MatGetBrowsOfAcols" 5105 /*@C 5106 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5107 5108 Collective on Mat 5109 5110 Input Parameters: 5111 + A,B - the matrices in mpiaij format 5112 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5113 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5114 5115 Output Parameter: 5116 + rowb, colb - index sets of rows and columns of B to extract 5117 - B_seq - the sequential matrix generated 5118 5119 Level: developer 5120 5121 @*/ 5122 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5123 { 5124 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5125 PetscErrorCode ierr; 5126 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5127 IS isrowb,iscolb; 5128 Mat *bseq=NULL; 5129 5130 PetscFunctionBegin; 5131 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5132 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5133 } 5134 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5135 5136 if (scall == MAT_INITIAL_MATRIX) { 5137 start = A->cmap->rstart; 5138 cmap = a->garray; 5139 nzA = a->A->cmap->n; 5140 nzB = a->B->cmap->n; 5141 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5142 ncols = 0; 5143 for (i=0; i<nzB; i++) { /* row < local row index */ 5144 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5145 else break; 5146 } 5147 imark = i; 5148 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5149 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5150 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5151 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5152 } else { 5153 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5154 isrowb = *rowb; iscolb = *colb; 5155 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5156 bseq[0] = *B_seq; 5157 } 5158 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5159 *B_seq = bseq[0]; 5160 ierr = PetscFree(bseq);CHKERRQ(ierr); 5161 if (!rowb) { 5162 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5163 } else { 5164 *rowb = isrowb; 5165 } 5166 if (!colb) { 5167 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5168 } else { 5169 *colb = iscolb; 5170 } 5171 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5172 PetscFunctionReturn(0); 5173 } 5174 5175 #undef __FUNCT__ 5176 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5177 /* 5178 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5179 of the OFF-DIAGONAL portion of local A 5180 5181 Collective on Mat 5182 5183 Input Parameters: 5184 + A,B - the matrices in mpiaij format 5185 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5186 5187 Output Parameter: 5188 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5189 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5190 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5191 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5192 5193 Level: developer 5194 5195 */ 5196 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5197 { 5198 VecScatter_MPI_General *gen_to,*gen_from; 5199 PetscErrorCode ierr; 5200 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5201 Mat_SeqAIJ *b_oth; 5202 VecScatter ctx =a->Mvctx; 5203 MPI_Comm comm; 5204 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5205 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5206 PetscScalar *rvalues,*svalues; 5207 MatScalar *b_otha,*bufa,*bufA; 5208 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5209 MPI_Request *rwaits = NULL,*swaits = NULL; 5210 MPI_Status *sstatus,rstatus; 5211 PetscMPIInt jj,size; 5212 PetscInt *cols,sbs,rbs; 5213 PetscScalar *vals; 5214 5215 PetscFunctionBegin; 5216 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5218 if (size == 1) PetscFunctionReturn(0); 5219 5220 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5221 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5222 } 5223 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5224 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5225 5226 gen_to = (VecScatter_MPI_General*)ctx->todata; 5227 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5228 rvalues = gen_from->values; /* holds the length of receiving row */ 5229 svalues = gen_to->values; /* holds the length of sending row */ 5230 nrecvs = gen_from->n; 5231 nsends = gen_to->n; 5232 5233 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5234 srow = gen_to->indices; /* local row index to be sent */ 5235 sstarts = gen_to->starts; 5236 sprocs = gen_to->procs; 5237 sstatus = gen_to->sstatus; 5238 sbs = gen_to->bs; 5239 rstarts = gen_from->starts; 5240 rprocs = gen_from->procs; 5241 rbs = gen_from->bs; 5242 5243 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5244 if (scall == MAT_INITIAL_MATRIX) { 5245 /* i-array */ 5246 /*---------*/ 5247 /* post receives */ 5248 for (i=0; i<nrecvs; i++) { 5249 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5250 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5251 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5252 } 5253 5254 /* pack the outgoing message */ 5255 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5256 5257 sstartsj[0] = 0; 5258 rstartsj[0] = 0; 5259 len = 0; /* total length of j or a array to be sent */ 5260 k = 0; 5261 for (i=0; i<nsends; i++) { 5262 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5263 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5264 for (j=0; j<nrows; j++) { 5265 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5266 for (l=0; l<sbs; l++) { 5267 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5268 5269 rowlen[j*sbs+l] = ncols; 5270 5271 len += ncols; 5272 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5273 } 5274 k++; 5275 } 5276 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5277 5278 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5279 } 5280 /* recvs and sends of i-array are completed */ 5281 i = nrecvs; 5282 while (i--) { 5283 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5284 } 5285 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5286 5287 /* allocate buffers for sending j and a arrays */ 5288 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5289 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5290 5291 /* create i-array of B_oth */ 5292 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5293 5294 b_othi[0] = 0; 5295 len = 0; /* total length of j or a array to be received */ 5296 k = 0; 5297 for (i=0; i<nrecvs; i++) { 5298 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5299 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5300 for (j=0; j<nrows; j++) { 5301 b_othi[k+1] = b_othi[k] + rowlen[j]; 5302 len += rowlen[j]; k++; 5303 } 5304 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5305 } 5306 5307 /* allocate space for j and a arrrays of B_oth */ 5308 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5309 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5310 5311 /* j-array */ 5312 /*---------*/ 5313 /* post receives of j-array */ 5314 for (i=0; i<nrecvs; i++) { 5315 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5316 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5317 } 5318 5319 /* pack the outgoing message j-array */ 5320 k = 0; 5321 for (i=0; i<nsends; i++) { 5322 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5323 bufJ = bufj+sstartsj[i]; 5324 for (j=0; j<nrows; j++) { 5325 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5326 for (ll=0; ll<sbs; ll++) { 5327 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5328 for (l=0; l<ncols; l++) { 5329 *bufJ++ = cols[l]; 5330 } 5331 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5332 } 5333 } 5334 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5335 } 5336 5337 /* recvs and sends of j-array are completed */ 5338 i = nrecvs; 5339 while (i--) { 5340 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5341 } 5342 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5343 } else if (scall == MAT_REUSE_MATRIX) { 5344 sstartsj = *startsj_s; 5345 rstartsj = *startsj_r; 5346 bufa = *bufa_ptr; 5347 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5348 b_otha = b_oth->a; 5349 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5350 5351 /* a-array */ 5352 /*---------*/ 5353 /* post receives of a-array */ 5354 for (i=0; i<nrecvs; i++) { 5355 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5356 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5357 } 5358 5359 /* pack the outgoing message a-array */ 5360 k = 0; 5361 for (i=0; i<nsends; i++) { 5362 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5363 bufA = bufa+sstartsj[i]; 5364 for (j=0; j<nrows; j++) { 5365 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5366 for (ll=0; ll<sbs; ll++) { 5367 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5368 for (l=0; l<ncols; l++) { 5369 *bufA++ = vals[l]; 5370 } 5371 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5372 } 5373 } 5374 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5375 } 5376 /* recvs and sends of a-array are completed */ 5377 i = nrecvs; 5378 while (i--) { 5379 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5380 } 5381 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5382 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5383 5384 if (scall == MAT_INITIAL_MATRIX) { 5385 /* put together the new matrix */ 5386 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5387 5388 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5389 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5390 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5391 b_oth->free_a = PETSC_TRUE; 5392 b_oth->free_ij = PETSC_TRUE; 5393 b_oth->nonew = 0; 5394 5395 ierr = PetscFree(bufj);CHKERRQ(ierr); 5396 if (!startsj_s || !bufa_ptr) { 5397 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5398 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5399 } else { 5400 *startsj_s = sstartsj; 5401 *startsj_r = rstartsj; 5402 *bufa_ptr = bufa; 5403 } 5404 } 5405 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5406 PetscFunctionReturn(0); 5407 } 5408 5409 #undef __FUNCT__ 5410 #define __FUNCT__ "MatGetCommunicationStructs" 5411 /*@C 5412 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5413 5414 Not Collective 5415 5416 Input Parameters: 5417 . A - The matrix in mpiaij format 5418 5419 Output Parameter: 5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5421 . colmap - A map from global column index to local index into lvec 5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5423 5424 Level: developer 5425 5426 @*/ 5427 #if defined(PETSC_USE_CTABLE) 5428 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5429 #else 5430 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5431 #endif 5432 { 5433 Mat_MPIAIJ *a; 5434 5435 PetscFunctionBegin; 5436 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5437 PetscValidPointer(lvec, 2); 5438 PetscValidPointer(colmap, 3); 5439 PetscValidPointer(multScatter, 4); 5440 a = (Mat_MPIAIJ*) A->data; 5441 if (lvec) *lvec = a->lvec; 5442 if (colmap) *colmap = a->colmap; 5443 if (multScatter) *multScatter = a->Mvctx; 5444 PetscFunctionReturn(0); 5445 } 5446 5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5450 5451 #undef __FUNCT__ 5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5453 /* 5454 Computes (B'*A')' since computing B*A directly is untenable 5455 5456 n p p 5457 ( ) ( ) ( ) 5458 m ( A ) * n ( B ) = m ( C ) 5459 ( ) ( ) ( ) 5460 5461 */ 5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5463 { 5464 PetscErrorCode ierr; 5465 Mat At,Bt,Ct; 5466 5467 PetscFunctionBegin; 5468 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5469 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5470 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5471 ierr = MatDestroy(&At);CHKERRQ(ierr); 5472 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5473 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5474 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5475 PetscFunctionReturn(0); 5476 } 5477 5478 #undef __FUNCT__ 5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5481 { 5482 PetscErrorCode ierr; 5483 PetscInt m=A->rmap->n,n=B->cmap->n; 5484 Mat Cmat; 5485 5486 PetscFunctionBegin; 5487 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5488 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5489 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5490 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5491 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5492 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5493 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5494 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5495 5496 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5497 5498 *C = Cmat; 5499 PetscFunctionReturn(0); 5500 } 5501 5502 /* ----------------------------------------------------------------*/ 5503 #undef __FUNCT__ 5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5506 { 5507 PetscErrorCode ierr; 5508 5509 PetscFunctionBegin; 5510 if (scall == MAT_INITIAL_MATRIX) { 5511 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5512 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5513 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5514 } 5515 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5516 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5517 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5518 PetscFunctionReturn(0); 5519 } 5520 5521 #if defined(PETSC_HAVE_MUMPS) 5522 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_PASTIX) 5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_SUPERLU_DIST) 5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_CLIQUE) 5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5532 #endif 5533 5534 /*MC 5535 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5536 5537 Options Database Keys: 5538 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5539 5540 Level: beginner 5541 5542 .seealso: MatCreateAIJ() 5543 M*/ 5544 5545 #undef __FUNCT__ 5546 #define __FUNCT__ "MatCreate_MPIAIJ" 5547 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5548 { 5549 Mat_MPIAIJ *b; 5550 PetscErrorCode ierr; 5551 PetscMPIInt size; 5552 5553 PetscFunctionBegin; 5554 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5555 5556 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5557 B->data = (void*)b; 5558 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5559 B->assembled = PETSC_FALSE; 5560 B->insertmode = NOT_SET_VALUES; 5561 b->size = size; 5562 5563 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5564 5565 /* build cache for off array entries formed */ 5566 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5567 5568 b->donotstash = PETSC_FALSE; 5569 b->colmap = 0; 5570 b->garray = 0; 5571 b->roworiented = PETSC_TRUE; 5572 5573 /* stuff used for matrix vector multiply */ 5574 b->lvec = NULL; 5575 b->Mvctx = NULL; 5576 5577 /* stuff for MatGetRow() */ 5578 b->rowindices = 0; 5579 b->rowvalues = 0; 5580 b->getrowactive = PETSC_FALSE; 5581 5582 /* flexible pointer used in CUSP/CUSPARSE classes */ 5583 b->spptr = NULL; 5584 5585 #if defined(PETSC_HAVE_MUMPS) 5586 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_PASTIX) 5589 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5590 #endif 5591 #if defined(PETSC_HAVE_SUPERLU_DIST) 5592 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5593 #endif 5594 #if defined(PETSC_HAVE_CLIQUE) 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5596 #endif 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5610 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5611 PetscFunctionReturn(0); 5612 } 5613 5614 #undef __FUNCT__ 5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5616 /*@C 5617 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5618 and "off-diagonal" part of the matrix in CSR format. 5619 5620 Collective on MPI_Comm 5621 5622 Input Parameters: 5623 + comm - MPI communicator 5624 . m - number of local rows (Cannot be PETSC_DECIDE) 5625 . n - This value should be the same as the local size used in creating the 5626 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5627 calculated if N is given) For square matrices n is almost always m. 5628 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5629 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5630 . i - row indices for "diagonal" portion of matrix 5631 . j - column indices 5632 . a - matrix values 5633 . oi - row indices for "off-diagonal" portion of matrix 5634 . oj - column indices 5635 - oa - matrix values 5636 5637 Output Parameter: 5638 . mat - the matrix 5639 5640 Level: advanced 5641 5642 Notes: 5643 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5644 must free the arrays once the matrix has been destroyed and not before. 5645 5646 The i and j indices are 0 based 5647 5648 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5649 5650 This sets local rows and cannot be used to set off-processor values. 5651 5652 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5653 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5654 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5655 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5656 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5657 communication if it is known that only local entries will be set. 5658 5659 .keywords: matrix, aij, compressed row, sparse, parallel 5660 5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5662 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5663 C@*/ 5664 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5665 { 5666 PetscErrorCode ierr; 5667 Mat_MPIAIJ *maij; 5668 5669 PetscFunctionBegin; 5670 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5671 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5672 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5673 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5674 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5675 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5676 maij = (Mat_MPIAIJ*) (*mat)->data; 5677 5678 (*mat)->preallocated = PETSC_TRUE; 5679 5680 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5681 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5682 5683 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5685 5686 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 5691 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5692 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5693 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5694 PetscFunctionReturn(0); 5695 } 5696 5697 /* 5698 Special version for direct calls from Fortran 5699 */ 5700 #include <petsc-private/fortranimpl.h> 5701 5702 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5706 #endif 5707 5708 /* Change these macros so can be used in void function */ 5709 #undef CHKERRQ 5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5711 #undef SETERRQ2 5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5713 #undef SETERRQ3 5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5715 #undef SETERRQ 5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5717 5718 #undef __FUNCT__ 5719 #define __FUNCT__ "matsetvaluesmpiaij_" 5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5721 { 5722 Mat mat = *mmat; 5723 PetscInt m = *mm, n = *mn; 5724 InsertMode addv = *maddv; 5725 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5726 PetscScalar value; 5727 PetscErrorCode ierr; 5728 5729 MatCheckPreallocated(mat,1); 5730 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5731 5732 #if defined(PETSC_USE_DEBUG) 5733 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5734 #endif 5735 { 5736 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5737 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5738 PetscBool roworiented = aij->roworiented; 5739 5740 /* Some Variables required in the macro */ 5741 Mat A = aij->A; 5742 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5743 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5744 MatScalar *aa = a->a; 5745 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5746 Mat B = aij->B; 5747 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5748 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5749 MatScalar *ba = b->a; 5750 5751 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5752 PetscInt nonew = a->nonew; 5753 MatScalar *ap1,*ap2; 5754 5755 PetscFunctionBegin; 5756 for (i=0; i<m; i++) { 5757 if (im[i] < 0) continue; 5758 #if defined(PETSC_USE_DEBUG) 5759 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5760 #endif 5761 if (im[i] >= rstart && im[i] < rend) { 5762 row = im[i] - rstart; 5763 lastcol1 = -1; 5764 rp1 = aj + ai[row]; 5765 ap1 = aa + ai[row]; 5766 rmax1 = aimax[row]; 5767 nrow1 = ailen[row]; 5768 low1 = 0; 5769 high1 = nrow1; 5770 lastcol2 = -1; 5771 rp2 = bj + bi[row]; 5772 ap2 = ba + bi[row]; 5773 rmax2 = bimax[row]; 5774 nrow2 = bilen[row]; 5775 low2 = 0; 5776 high2 = nrow2; 5777 5778 for (j=0; j<n; j++) { 5779 if (roworiented) value = v[i*n+j]; 5780 else value = v[i+j*m]; 5781 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5782 if (in[j] >= cstart && in[j] < cend) { 5783 col = in[j] - cstart; 5784 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5785 } else if (in[j] < 0) continue; 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5788 #endif 5789 else { 5790 if (mat->was_assembled) { 5791 if (!aij->colmap) { 5792 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5793 } 5794 #if defined(PETSC_USE_CTABLE) 5795 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5796 col--; 5797 #else 5798 col = aij->colmap[in[j]] - 1; 5799 #endif 5800 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5801 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5802 col = in[j]; 5803 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5804 B = aij->B; 5805 b = (Mat_SeqAIJ*)B->data; 5806 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 bm = aij->B->rmap->n; 5814 ba = b->a; 5815 } 5816 } else col = in[j]; 5817 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5818 } 5819 } 5820 } else if (!aij->donotstash) { 5821 if (roworiented) { 5822 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5823 } else { 5824 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5825 } 5826 } 5827 } 5828 } 5829 PetscFunctionReturnVoid(); 5830 } 5831 5832