1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc-private/vecimpl.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 /*MC 8 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 9 10 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 11 and MATMPIAIJ otherwise. As a result, for single process communicators, 12 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 13 for communicators controlling multiple processes. It is recommended that you call both of 14 the above preallocation routines for simplicity. 15 16 Options Database Keys: 17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 18 19 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 20 enough exist. 21 22 Level: beginner 23 24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 25 M*/ 26 27 /*MC 28 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 29 30 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 31 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 32 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 33 for communicators controlling multiple processes. It is recommended that you call both of 34 the above preallocation routines for simplicity. 35 36 Options Database Keys: 37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 38 39 Level: beginner 40 41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 42 M*/ 43 44 #undef __FUNCT__ 45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 51 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 52 const PetscInt *ia,*ib; 53 const MatScalar *aa,*bb; 54 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 55 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 56 57 PetscFunctionBegin; 58 *keptrows = 0; 59 ia = a->i; 60 ib = b->i; 61 for (i=0; i<m; i++) { 62 na = ia[i+1] - ia[i]; 63 nb = ib[i+1] - ib[i]; 64 if (!na && !nb) { 65 cnt++; 66 goto ok1; 67 } 68 aa = a->a + ia[i]; 69 for (j=0; j<na; j++) { 70 if (aa[j] != 0.0) goto ok1; 71 } 72 bb = b->a + ib[i]; 73 for (j=0; j <nb; j++) { 74 if (bb[j] != 0.0) goto ok1; 75 } 76 cnt++; 77 ok1:; 78 } 79 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 80 if (!n0rows) PetscFunctionReturn(0); 81 ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr); 82 cnt = 0; 83 for (i=0; i<m; i++) { 84 na = ia[i+1] - ia[i]; 85 nb = ib[i+1] - ib[i]; 86 if (!na && !nb) continue; 87 aa = a->a + ia[i]; 88 for (j=0; j<na;j++) { 89 if (aa[j] != 0.0) { 90 rows[cnt++] = rstart + i; 91 goto ok2; 92 } 93 } 94 bb = b->a + ib[i]; 95 for (j=0; j<nb; j++) { 96 if (bb[j] != 0.0) { 97 rows[cnt++] = rstart + i; 98 goto ok2; 99 } 100 } 101 ok2:; 102 } 103 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 110 { 111 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 112 PetscErrorCode ierr; 113 PetscInt i,rstart,nrows,*rows; 114 115 PetscFunctionBegin; 116 *zrows = NULL; 117 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 118 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 119 for (i=0; i<nrows; i++) rows[i] += rstart; 120 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 127 { 128 PetscErrorCode ierr; 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 130 PetscInt i,n,*garray = aij->garray; 131 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 132 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 133 PetscReal *work; 134 135 PetscFunctionBegin; 136 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 137 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 138 if (type == NORM_2) { 139 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 140 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 141 } 142 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 143 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 144 } 145 } else if (type == NORM_1) { 146 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 147 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 148 } 149 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 150 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 151 } 152 } else if (type == NORM_INFINITY) { 153 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 154 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 155 } 156 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 157 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 158 } 159 160 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 161 if (type == NORM_INFINITY) { 162 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 163 } else { 164 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 165 } 166 ierr = PetscFree(work);CHKERRQ(ierr); 167 if (type == NORM_2) { 168 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 169 } 170 PetscFunctionReturn(0); 171 } 172 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatDistribute_MPIAIJ" 175 /* 176 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 177 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 178 179 Only for square matrices 180 181 Used by a preconditioner, hence PETSC_EXTERN 182 */ 183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 184 { 185 PetscMPIInt rank,size; 186 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 187 PetscErrorCode ierr; 188 Mat mat; 189 Mat_SeqAIJ *gmata; 190 PetscMPIInt tag; 191 MPI_Status status; 192 PetscBool aij; 193 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 194 195 PetscFunctionBegin; 196 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 197 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 198 if (!rank) { 199 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 200 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 201 } 202 if (reuse == MAT_INITIAL_MATRIX) { 203 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 204 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 205 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 206 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 207 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 208 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 209 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 210 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 211 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 212 213 rowners[0] = 0; 214 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 215 rstart = rowners[rank]; 216 rend = rowners[rank+1]; 217 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 218 if (!rank) { 219 gmata = (Mat_SeqAIJ*) gmat->data; 220 /* send row lengths to all processors */ 221 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 222 for (i=1; i<size; i++) { 223 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 224 } 225 /* determine number diagonal and off-diagonal counts */ 226 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 227 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 228 jj = 0; 229 for (i=0; i<m; i++) { 230 for (j=0; j<dlens[i]; j++) { 231 if (gmata->j[jj] < rstart) ld[i]++; 232 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 233 jj++; 234 } 235 } 236 /* send column indices to other processes */ 237 for (i=1; i<size; i++) { 238 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 239 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 240 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 241 } 242 243 /* send numerical values to other processes */ 244 for (i=1; i<size; i++) { 245 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 246 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 247 } 248 gmataa = gmata->a; 249 gmataj = gmata->j; 250 251 } else { 252 /* receive row lengths */ 253 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 254 /* receive column indices */ 255 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 256 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 257 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 258 /* determine number diagonal and off-diagonal counts */ 259 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 260 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 261 jj = 0; 262 for (i=0; i<m; i++) { 263 for (j=0; j<dlens[i]; j++) { 264 if (gmataj[jj] < rstart) ld[i]++; 265 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 266 jj++; 267 } 268 } 269 /* receive numerical values */ 270 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 271 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 272 } 273 /* set preallocation */ 274 for (i=0; i<m; i++) { 275 dlens[i] -= olens[i]; 276 } 277 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 278 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 279 280 for (i=0; i<m; i++) { 281 dlens[i] += olens[i]; 282 } 283 cnt = 0; 284 for (i=0; i<m; i++) { 285 row = rstart + i; 286 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 287 cnt += dlens[i]; 288 } 289 if (rank) { 290 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 291 } 292 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 293 ierr = PetscFree(rowners);CHKERRQ(ierr); 294 295 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 296 297 *inmat = mat; 298 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 301 mat = *inmat; 302 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 303 if (!rank) { 304 /* send numerical values to other processes */ 305 gmata = (Mat_SeqAIJ*) gmat->data; 306 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 307 gmataa = gmata->a; 308 for (i=1; i<size; i++) { 309 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 310 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 311 } 312 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 313 } else { 314 /* receive numerical values from process 0*/ 315 nz = Ad->nz + Ao->nz; 316 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 317 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 318 } 319 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 320 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 321 ad = Ad->a; 322 ao = Ao->a; 323 if (mat->rmap->n) { 324 i = 0; 325 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 326 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 327 } 328 for (i=1; i<mat->rmap->n; i++) { 329 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 330 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 331 } 332 i--; 333 if (mat->rmap->n) { 334 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 335 } 336 if (rank) { 337 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 338 } 339 } 340 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 341 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 Local utility routine that creates a mapping from the global column 347 number to the local number in the off-diagonal part of the local 348 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 349 a slightly higher hash table cost; without it it is not scalable (each processor 350 has an order N integer array but is fast to acess. 351 */ 352 #undef __FUNCT__ 353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 355 { 356 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 357 PetscErrorCode ierr; 358 PetscInt n = aij->B->cmap->n,i; 359 360 PetscFunctionBegin; 361 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 362 #if defined(PETSC_USE_CTABLE) 363 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 364 for (i=0; i<n; i++) { 365 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 366 } 367 #else 368 ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr); 369 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 370 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 371 #endif 372 PetscFunctionReturn(0); 373 } 374 375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 376 { \ 377 if (col <= lastcol1) low1 = 0; \ 378 else high1 = nrow1; \ 379 lastcol1 = col;\ 380 while (high1-low1 > 5) { \ 381 t = (low1+high1)/2; \ 382 if (rp1[t] > col) high1 = t; \ 383 else low1 = t; \ 384 } \ 385 for (_i=low1; _i<high1; _i++) { \ 386 if (rp1[_i] > col) break; \ 387 if (rp1[_i] == col) { \ 388 if (addv == ADD_VALUES) ap1[_i] += value; \ 389 else ap1[_i] = value; \ 390 goto a_noinsert; \ 391 } \ 392 } \ 393 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 394 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 395 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 396 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 397 N = nrow1++ - 1; a->nz++; high1++; \ 398 /* shift up all the later entries in this row */ \ 399 for (ii=N; ii>=_i; ii--) { \ 400 rp1[ii+1] = rp1[ii]; \ 401 ap1[ii+1] = ap1[ii]; \ 402 } \ 403 rp1[_i] = col; \ 404 ap1[_i] = value; \ 405 A->nonzerostate++;\ 406 a_noinsert: ; \ 407 ailen[row] = nrow1; \ 408 } 409 410 411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 412 { \ 413 if (col <= lastcol2) low2 = 0; \ 414 else high2 = nrow2; \ 415 lastcol2 = col; \ 416 while (high2-low2 > 5) { \ 417 t = (low2+high2)/2; \ 418 if (rp2[t] > col) high2 = t; \ 419 else low2 = t; \ 420 } \ 421 for (_i=low2; _i<high2; _i++) { \ 422 if (rp2[_i] > col) break; \ 423 if (rp2[_i] == col) { \ 424 if (addv == ADD_VALUES) ap2[_i] += value; \ 425 else ap2[_i] = value; \ 426 goto b_noinsert; \ 427 } \ 428 } \ 429 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 430 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 431 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 432 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 433 N = nrow2++ - 1; b->nz++; high2++; \ 434 /* shift up all the later entries in this row */ \ 435 for (ii=N; ii>=_i; ii--) { \ 436 rp2[ii+1] = rp2[ii]; \ 437 ap2[ii+1] = ap2[ii]; \ 438 } \ 439 rp2[_i] = col; \ 440 ap2[_i] = value; \ 441 B->nonzerostate++; \ 442 b_noinsert: ; \ 443 bilen[row] = nrow2; \ 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 449 { 450 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 451 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 452 PetscErrorCode ierr; 453 PetscInt l,*garray = mat->garray,diag; 454 455 PetscFunctionBegin; 456 /* code only works for square matrices A */ 457 458 /* find size of row to the left of the diagonal part */ 459 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 460 row = row - diag; 461 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 462 if (garray[b->j[b->i[row]+l]] > diag) break; 463 } 464 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 465 466 /* diagonal part */ 467 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 468 469 /* right of diagonal part */ 470 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatSetValues_MPIAIJ" 476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 477 { 478 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 479 PetscScalar value; 480 PetscErrorCode ierr; 481 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 482 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 483 PetscBool roworiented = aij->roworiented; 484 485 /* Some Variables required in the macro */ 486 Mat A = aij->A; 487 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 488 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 489 MatScalar *aa = a->a; 490 PetscBool ignorezeroentries = a->ignorezeroentries; 491 Mat B = aij->B; 492 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 493 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 494 MatScalar *ba = b->a; 495 496 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 497 PetscInt nonew; 498 MatScalar *ap1,*ap2; 499 500 PetscFunctionBegin; 501 for (i=0; i<m; i++) { 502 if (im[i] < 0) continue; 503 #if defined(PETSC_USE_DEBUG) 504 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 505 #endif 506 if (im[i] >= rstart && im[i] < rend) { 507 row = im[i] - rstart; 508 lastcol1 = -1; 509 rp1 = aj + ai[row]; 510 ap1 = aa + ai[row]; 511 rmax1 = aimax[row]; 512 nrow1 = ailen[row]; 513 low1 = 0; 514 high1 = nrow1; 515 lastcol2 = -1; 516 rp2 = bj + bi[row]; 517 ap2 = ba + bi[row]; 518 rmax2 = bimax[row]; 519 nrow2 = bilen[row]; 520 low2 = 0; 521 high2 = nrow2; 522 523 for (j=0; j<n; j++) { 524 if (roworiented) value = v[i*n+j]; 525 else value = v[i+j*m]; 526 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 527 if (in[j] >= cstart && in[j] < cend) { 528 col = in[j] - cstart; 529 nonew = a->nonew; 530 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 531 } else if (in[j] < 0) continue; 532 #if defined(PETSC_USE_DEBUG) 533 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 534 #endif 535 else { 536 if (mat->was_assembled) { 537 if (!aij->colmap) { 538 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 539 } 540 #if defined(PETSC_USE_CTABLE) 541 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 542 col--; 543 #else 544 col = aij->colmap[in[j]] - 1; 545 #endif 546 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 547 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 548 col = in[j]; 549 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 550 B = aij->B; 551 b = (Mat_SeqAIJ*)B->data; 552 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 553 rp2 = bj + bi[row]; 554 ap2 = ba + bi[row]; 555 rmax2 = bimax[row]; 556 nrow2 = bilen[row]; 557 low2 = 0; 558 high2 = nrow2; 559 bm = aij->B->rmap->n; 560 ba = b->a; 561 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 562 } else col = in[j]; 563 nonew = b->nonew; 564 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 565 } 566 } 567 } else { 568 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 569 if (!aij->donotstash) { 570 mat->assembled = PETSC_FALSE; 571 if (roworiented) { 572 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 573 } else { 574 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 575 } 576 } 577 } 578 } 579 PetscFunctionReturn(0); 580 } 581 582 #undef __FUNCT__ 583 #define __FUNCT__ "MatGetValues_MPIAIJ" 584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 585 { 586 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 587 PetscErrorCode ierr; 588 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 589 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 590 591 PetscFunctionBegin; 592 for (i=0; i<m; i++) { 593 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 594 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 595 if (idxm[i] >= rstart && idxm[i] < rend) { 596 row = idxm[i] - rstart; 597 for (j=0; j<n; j++) { 598 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 599 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 600 if (idxn[j] >= cstart && idxn[j] < cend) { 601 col = idxn[j] - cstart; 602 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 603 } else { 604 if (!aij->colmap) { 605 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 606 } 607 #if defined(PETSC_USE_CTABLE) 608 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 609 col--; 610 #else 611 col = aij->colmap[idxn[j]] - 1; 612 #endif 613 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 614 else { 615 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 616 } 617 } 618 } 619 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt nstash,reallocs; 633 InsertMode addv; 634 635 PetscFunctionBegin; 636 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 637 638 /* make sure all processors are either in INSERTMODE or ADDMODE */ 639 ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 640 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 641 mat->insertmode = addv; /* in case this processor had no cache */ 642 643 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 644 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 645 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 646 PetscFunctionReturn(0); 647 } 648 649 #undef __FUNCT__ 650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 652 { 653 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 654 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 655 PetscErrorCode ierr; 656 PetscMPIInt n; 657 PetscInt i,j,rstart,ncols,flg; 658 PetscInt *row,*col; 659 PetscBool other_disassembled; 660 PetscScalar *val; 661 InsertMode addv = mat->insertmode; 662 663 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 664 665 PetscFunctionBegin; 666 if (!aij->donotstash && !mat->nooffprocentries) { 667 while (1) { 668 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 669 if (!flg) break; 670 671 for (i=0; i<n; ) { 672 /* Now identify the consecutive vals belonging to the same row */ 673 for (j=i,rstart=row[j]; j<n; j++) { 674 if (row[j] != rstart) break; 675 } 676 if (j < n) ncols = j-i; 677 else ncols = n-i; 678 /* Now assemble all these values with a single function call */ 679 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 680 681 i = j; 682 } 683 } 684 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 685 } 686 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 687 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 688 689 /* determine if any processor has disassembled, if so we must 690 also disassemble ourselfs, in order that we may reassemble. */ 691 /* 692 if nonzero structure of submatrix B cannot change then we know that 693 no processor disassembled thus we can skip this stuff 694 */ 695 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 696 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 697 if (mat->was_assembled && !other_disassembled) { 698 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 699 } 700 } 701 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 702 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 703 } 704 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 705 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 706 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 707 708 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 709 710 aij->rowvalues = 0; 711 712 /* used by MatAXPY() */ 713 a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 714 a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 715 716 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 717 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 718 719 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 720 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 721 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 722 ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 730 { 731 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 732 PetscErrorCode ierr; 733 734 PetscFunctionBegin; 735 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 736 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 737 PetscFunctionReturn(0); 738 } 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatZeroRows_MPIAIJ" 742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 743 { 744 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 745 PetscInt *owners = A->rmap->range; 746 PetscInt n = A->rmap->n; 747 PetscSF sf; 748 PetscInt *lrows; 749 PetscSFNode *rrows; 750 PetscInt r, p = 0, len = 0; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 /* Create SF where leaves are input rows and roots are owned rows */ 755 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 756 for (r = 0; r < n; ++r) lrows[r] = -1; 757 if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 758 for (r = 0; r < N; ++r) { 759 const PetscInt idx = rows[r]; 760 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 761 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 762 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 763 } 764 if (A->nooffproczerorows) { 765 if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank); 766 lrows[len++] = idx - owners[p]; 767 } else { 768 rrows[r].rank = p; 769 rrows[r].index = rows[r] - owners[p]; 770 } 771 } 772 if (!A->nooffproczerorows) { 773 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 774 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 775 /* Collect flags for rows to be zeroed */ 776 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 777 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr); 778 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 779 /* Compress and put in row numbers */ 780 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 781 } 782 /* fix right hand side if needed */ 783 if (x && b) { 784 const PetscScalar *xx; 785 PetscScalar *bb; 786 787 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 788 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 789 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 790 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 791 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 792 } 793 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 794 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 795 if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) { 796 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 797 } else if (diag != 0.0) { 798 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 799 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 800 for (r = 0; r < len; ++r) { 801 const PetscInt row = lrows[r] + A->rmap->rstart; 802 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 803 } 804 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 805 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 } else { 807 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 808 } 809 ierr = PetscFree(lrows);CHKERRQ(ierr); 810 811 /* only change matrix nonzero state if pattern was allowed to be changed */ 812 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 813 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 814 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 815 } 816 PetscFunctionReturn(0); 817 } 818 819 #undef __FUNCT__ 820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 822 { 823 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 824 PetscErrorCode ierr; 825 PetscMPIInt n = A->rmap->n; 826 PetscInt i,j,r,m,p = 0,len = 0; 827 PetscInt *lrows,*owners = A->rmap->range; 828 PetscSFNode *rrows; 829 PetscSF sf; 830 const PetscScalar *xx; 831 PetscScalar *bb,*mask; 832 Vec xmask,lmask; 833 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 834 const PetscInt *aj, *ii,*ridx; 835 PetscScalar *aa; 836 837 PetscFunctionBegin; 838 /* Create SF where leaves are input rows and roots are owned rows */ 839 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 840 for (r = 0; r < n; ++r) lrows[r] = -1; 841 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 842 for (r = 0; r < N; ++r) { 843 const PetscInt idx = rows[r]; 844 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 845 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 846 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 847 } 848 rrows[r].rank = p; 849 rrows[r].index = rows[r] - owners[p]; 850 } 851 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 852 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 853 /* Collect flags for rows to be zeroed */ 854 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 856 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 857 /* Compress and put in row numbers */ 858 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 861 /* handle off diagonal part of matrix */ 862 ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 863 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 864 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 865 for (i=0; i<len; i++) bb[lrows[i]] = 1; 866 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 867 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 870 if (x) { 871 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 873 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 874 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 875 } 876 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 877 /* remove zeroed rows of off diagonal matrix */ 878 ii = aij->i; 879 for (i=0; i<len; i++) { 880 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 881 } 882 /* loop over all elements of off process part of matrix zeroing removed columns*/ 883 if (aij->compressedrow.use) { 884 m = aij->compressedrow.nrows; 885 ii = aij->compressedrow.i; 886 ridx = aij->compressedrow.rindex; 887 for (i=0; i<m; i++) { 888 n = ii[i+1] - ii[i]; 889 aj = aij->j + ii[i]; 890 aa = aij->a + ii[i]; 891 892 for (j=0; j<n; j++) { 893 if (PetscAbsScalar(mask[*aj])) { 894 if (b) bb[*ridx] -= *aa*xx[*aj]; 895 *aa = 0.0; 896 } 897 aa++; 898 aj++; 899 } 900 ridx++; 901 } 902 } else { /* do not use compressed row format */ 903 m = l->B->rmap->n; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 for (j=0; j<n; j++) { 909 if (PetscAbsScalar(mask[*aj])) { 910 if (b) bb[i] -= *aa*xx[*aj]; 911 *aa = 0.0; 912 } 913 aa++; 914 aj++; 915 } 916 } 917 } 918 if (x) { 919 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 920 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 921 } 922 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 923 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 924 ierr = PetscFree(lrows);CHKERRQ(ierr); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 928 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 929 ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 #undef __FUNCT__ 935 #define __FUNCT__ "MatMult_MPIAIJ" 936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 937 { 938 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 939 PetscErrorCode ierr; 940 PetscInt nt; 941 942 PetscFunctionBegin; 943 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 944 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 945 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 946 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 947 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 955 { 956 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 957 PetscErrorCode ierr; 958 959 PetscFunctionBegin; 960 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatMultAdd_MPIAIJ" 966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 967 { 968 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 969 PetscErrorCode ierr; 970 971 PetscFunctionBegin; 972 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 974 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 975 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 #undef __FUNCT__ 980 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 982 { 983 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 984 PetscErrorCode ierr; 985 PetscBool merged; 986 987 PetscFunctionBegin; 988 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 989 /* do nondiagonal part */ 990 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 991 if (!merged) { 992 /* send it on its way */ 993 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 994 /* do local part */ 995 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 996 /* receive remote parts: note this assumes the values are not actually */ 997 /* added in yy until the next line, */ 998 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 999 } else { 1000 /* do local part */ 1001 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1002 /* send it on its way */ 1003 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1004 /* values actually were received in the Begin() but we need to call this nop */ 1005 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1006 } 1007 PetscFunctionReturn(0); 1008 } 1009 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1013 { 1014 MPI_Comm comm; 1015 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1016 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1017 IS Me,Notme; 1018 PetscErrorCode ierr; 1019 PetscInt M,N,first,last,*notme,i; 1020 PetscMPIInt size; 1021 1022 PetscFunctionBegin; 1023 /* Easy test: symmetric diagonal block */ 1024 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1025 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1026 if (!*f) PetscFunctionReturn(0); 1027 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1028 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1032 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1033 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1034 ierr = PetscMalloc1((N-last+first),¬me);CHKERRQ(ierr); 1035 for (i=0; i<first; i++) notme[i] = i; 1036 for (i=last; i<M; i++) notme[i-last+first] = i; 1037 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1038 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1039 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1040 Aoff = Aoffs[0]; 1041 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1042 Boff = Boffs[0]; 1043 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1044 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1045 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1046 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1047 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1048 ierr = PetscFree(notme);CHKERRQ(ierr); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 #undef __FUNCT__ 1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1057 PetscErrorCode ierr; 1058 1059 PetscFunctionBegin; 1060 /* do nondiagonal part */ 1061 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1062 /* send it on its way */ 1063 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 /* do local part */ 1065 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1066 /* receive remote parts */ 1067 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* 1072 This only works correctly for square matrices where the subblock A->A is the 1073 diagonal block 1074 */ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1078 { 1079 PetscErrorCode ierr; 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1081 1082 PetscFunctionBegin; 1083 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1084 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1085 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatScale_MPIAIJ" 1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1092 { 1093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 1096 PetscFunctionBegin; 1097 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1098 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatDestroy_Redundant" 1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant) 1105 { 1106 PetscErrorCode ierr; 1107 Mat_Redundant *redund = *redundant; 1108 PetscInt i; 1109 1110 PetscFunctionBegin; 1111 *redundant = NULL; 1112 if (redund){ 1113 if (redund->matseq) { /* via MatGetSubMatrices() */ 1114 ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr); 1115 ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr); 1116 ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr); 1117 ierr = PetscFree(redund->matseq);CHKERRQ(ierr); 1118 } else { 1119 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 1120 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 1121 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 1122 for (i=0; i<redund->nrecvs; i++) { 1123 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 1124 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 1125 } 1126 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 1127 } 1128 1129 if (redund->psubcomm) { 1130 ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr); 1131 } 1132 ierr = PetscFree(redund);CHKERRQ(ierr); 1133 } 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatDestroy_MPIAIJ" 1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1140 { 1141 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 #if defined(PETSC_USE_LOG) 1146 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1147 #endif 1148 ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr); 1149 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1150 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1151 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1152 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1153 #if defined(PETSC_USE_CTABLE) 1154 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1155 #else 1156 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1157 #endif 1158 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1159 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1160 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1161 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1162 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1163 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1164 1165 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1166 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1169 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1170 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1171 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1172 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1173 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1180 { 1181 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1182 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1183 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1184 PetscErrorCode ierr; 1185 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1186 int fd; 1187 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1188 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1189 PetscScalar *column_values; 1190 PetscInt message_count,flowcontrolcount; 1191 FILE *file; 1192 1193 PetscFunctionBegin; 1194 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1195 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1196 nz = A->nz + B->nz; 1197 if (!rank) { 1198 header[0] = MAT_FILE_CLASSID; 1199 header[1] = mat->rmap->N; 1200 header[2] = mat->cmap->N; 1201 1202 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1203 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1204 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1205 /* get largest number of rows any processor has */ 1206 rlen = mat->rmap->n; 1207 range = mat->rmap->range; 1208 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1209 } else { 1210 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1211 rlen = mat->rmap->n; 1212 } 1213 1214 /* load up the local row counts */ 1215 ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr); 1216 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1217 1218 /* store the row lengths to the file */ 1219 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1220 if (!rank) { 1221 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1222 for (i=1; i<size; i++) { 1223 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1224 rlen = range[i+1] - range[i]; 1225 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1227 } 1228 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1229 } else { 1230 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1231 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1232 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1233 } 1234 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1235 1236 /* load up the local column indices */ 1237 nzmax = nz; /* th processor needs space a largest processor needs */ 1238 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1239 ierr = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr); 1240 cnt = 0; 1241 for (i=0; i<mat->rmap->n; i++) { 1242 for (j=B->i[i]; j<B->i[i+1]; j++) { 1243 if ((col = garray[B->j[j]]) > cstart) break; 1244 column_indices[cnt++] = col; 1245 } 1246 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1247 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1248 } 1249 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1250 1251 /* store the column indices to the file */ 1252 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1253 if (!rank) { 1254 MPI_Status status; 1255 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1256 for (i=1; i<size; i++) { 1257 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1258 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1259 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1260 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1261 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1262 } 1263 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1264 } else { 1265 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1266 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1267 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1268 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1269 } 1270 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1271 1272 /* load up the local column values */ 1273 ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr); 1274 cnt = 0; 1275 for (i=0; i<mat->rmap->n; i++) { 1276 for (j=B->i[i]; j<B->i[i+1]; j++) { 1277 if (garray[B->j[j]] > cstart) break; 1278 column_values[cnt++] = B->a[j]; 1279 } 1280 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1281 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1282 } 1283 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1284 1285 /* store the column values to the file */ 1286 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1287 if (!rank) { 1288 MPI_Status status; 1289 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1290 for (i=1; i<size; i++) { 1291 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1292 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1293 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1294 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1295 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1296 } 1297 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1298 } else { 1299 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1300 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1302 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1303 } 1304 ierr = PetscFree(column_values);CHKERRQ(ierr); 1305 1306 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1307 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1308 PetscFunctionReturn(0); 1309 } 1310 1311 #include <petscdraw.h> 1312 #undef __FUNCT__ 1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1315 { 1316 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1317 PetscErrorCode ierr; 1318 PetscMPIInt rank = aij->rank,size = aij->size; 1319 PetscBool isdraw,iascii,isbinary; 1320 PetscViewer sviewer; 1321 PetscViewerFormat format; 1322 1323 PetscFunctionBegin; 1324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1325 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1326 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1327 if (iascii) { 1328 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1329 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1330 MatInfo info; 1331 PetscBool inodes; 1332 1333 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1334 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1335 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1337 if (!inodes) { 1338 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1339 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1340 } else { 1341 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1342 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1343 } 1344 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1345 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1346 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1347 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1348 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1349 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1350 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1351 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1352 PetscFunctionReturn(0); 1353 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1354 PetscInt inodecount,inodelimit,*inodes; 1355 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1356 if (inodes) { 1357 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1358 } else { 1359 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1360 } 1361 PetscFunctionReturn(0); 1362 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1363 PetscFunctionReturn(0); 1364 } 1365 } else if (isbinary) { 1366 if (size == 1) { 1367 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1368 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1369 } else { 1370 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1371 } 1372 PetscFunctionReturn(0); 1373 } else if (isdraw) { 1374 PetscDraw draw; 1375 PetscBool isnull; 1376 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1377 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1378 } 1379 1380 { 1381 /* assemble the entire matrix onto first processor. */ 1382 Mat A; 1383 Mat_SeqAIJ *Aloc; 1384 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1385 MatScalar *a; 1386 1387 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1388 if (!rank) { 1389 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1390 } else { 1391 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1392 } 1393 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1394 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1395 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1396 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1397 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1398 1399 /* copy over the A part */ 1400 Aloc = (Mat_SeqAIJ*)aij->A->data; 1401 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1404 for (i=0; i<m; i++) { 1405 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1406 row++; 1407 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1408 } 1409 aj = Aloc->j; 1410 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1411 1412 /* copy over the B part */ 1413 Aloc = (Mat_SeqAIJ*)aij->B->data; 1414 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1415 row = mat->rmap->rstart; 1416 ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr); 1417 ct = cols; 1418 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1419 for (i=0; i<m; i++) { 1420 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1421 row++; 1422 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1423 } 1424 ierr = PetscFree(ct);CHKERRQ(ierr); 1425 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1426 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1427 /* 1428 Everyone has to call to draw the matrix since the graphics waits are 1429 synchronized across all processors that share the PetscDraw object 1430 */ 1431 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1432 if (!rank) { 1433 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1434 } 1435 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1436 ierr = MatDestroy(&A);CHKERRQ(ierr); 1437 } 1438 PetscFunctionReturn(0); 1439 } 1440 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatView_MPIAIJ" 1443 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1444 { 1445 PetscErrorCode ierr; 1446 PetscBool iascii,isdraw,issocket,isbinary; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1451 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1452 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1453 if (iascii || isdraw || isbinary || issocket) { 1454 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1455 } 1456 PetscFunctionReturn(0); 1457 } 1458 1459 #undef __FUNCT__ 1460 #define __FUNCT__ "MatSOR_MPIAIJ" 1461 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1462 { 1463 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1464 PetscErrorCode ierr; 1465 Vec bb1 = 0; 1466 PetscBool hasop; 1467 1468 PetscFunctionBegin; 1469 if (flag == SOR_APPLY_UPPER) { 1470 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1471 PetscFunctionReturn(0); 1472 } 1473 1474 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1475 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1476 } 1477 1478 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1479 if (flag & SOR_ZERO_INITIAL_GUESS) { 1480 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1481 its--; 1482 } 1483 1484 while (its--) { 1485 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1486 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1487 1488 /* update rhs: bb1 = bb - B*x */ 1489 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1490 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1491 1492 /* local sweep */ 1493 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1494 } 1495 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1496 if (flag & SOR_ZERO_INITIAL_GUESS) { 1497 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1498 its--; 1499 } 1500 while (its--) { 1501 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1502 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1503 1504 /* update rhs: bb1 = bb - B*x */ 1505 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1506 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1507 1508 /* local sweep */ 1509 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1510 } 1511 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1512 if (flag & SOR_ZERO_INITIAL_GUESS) { 1513 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1514 its--; 1515 } 1516 while (its--) { 1517 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1518 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1519 1520 /* update rhs: bb1 = bb - B*x */ 1521 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1522 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1523 1524 /* local sweep */ 1525 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1526 } 1527 } else if (flag & SOR_EISENSTAT) { 1528 Vec xx1; 1529 1530 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1532 1533 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1534 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1535 if (!mat->diag) { 1536 ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1537 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1538 } 1539 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1540 if (hasop) { 1541 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1542 } else { 1543 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1544 } 1545 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1546 1547 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1548 1549 /* local sweep */ 1550 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1551 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1552 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1553 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1554 1555 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1556 PetscFunctionReturn(0); 1557 } 1558 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatPermute_MPIAIJ" 1561 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1562 { 1563 Mat aA,aB,Aperm; 1564 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1565 PetscScalar *aa,*ba; 1566 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1567 PetscSF rowsf,sf; 1568 IS parcolp = NULL; 1569 PetscBool done; 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1574 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1575 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1576 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1577 1578 /* Invert row permutation to find out where my rows should go */ 1579 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1580 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1581 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1582 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1583 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1585 1586 /* Invert column permutation to find out where my columns should go */ 1587 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1588 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1589 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1590 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1591 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1592 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1593 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1594 1595 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1597 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1598 1599 /* Find out where my gcols should go */ 1600 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1601 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1602 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1603 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1604 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1605 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1606 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1607 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1608 1609 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1610 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1611 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1612 for (i=0; i<m; i++) { 1613 PetscInt row = rdest[i],rowner; 1614 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1615 for (j=ai[i]; j<ai[i+1]; j++) { 1616 PetscInt cowner,col = cdest[aj[j]]; 1617 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1618 if (rowner == cowner) dnnz[i]++; 1619 else onnz[i]++; 1620 } 1621 for (j=bi[i]; j<bi[i+1]; j++) { 1622 PetscInt cowner,col = gcdest[bj[j]]; 1623 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1624 if (rowner == cowner) dnnz[i]++; 1625 else onnz[i]++; 1626 } 1627 } 1628 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1629 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1630 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1631 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1632 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1633 1634 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1635 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1636 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1637 for (i=0; i<m; i++) { 1638 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1639 PetscInt j0,rowlen; 1640 rowlen = ai[i+1] - ai[i]; 1641 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1642 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1643 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1644 } 1645 rowlen = bi[i+1] - bi[i]; 1646 for (j0=j=0; j<rowlen; j0=j) { 1647 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1648 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1649 } 1650 } 1651 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1652 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1653 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1654 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1655 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1656 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1657 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1658 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1659 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1660 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1661 *B = Aperm; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1667 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1668 { 1669 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1670 Mat A = mat->A,B = mat->B; 1671 PetscErrorCode ierr; 1672 PetscReal isend[5],irecv[5]; 1673 1674 PetscFunctionBegin; 1675 info->block_size = 1.0; 1676 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1677 1678 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1679 isend[3] = info->memory; isend[4] = info->mallocs; 1680 1681 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1682 1683 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1684 isend[3] += info->memory; isend[4] += info->mallocs; 1685 if (flag == MAT_LOCAL) { 1686 info->nz_used = isend[0]; 1687 info->nz_allocated = isend[1]; 1688 info->nz_unneeded = isend[2]; 1689 info->memory = isend[3]; 1690 info->mallocs = isend[4]; 1691 } else if (flag == MAT_GLOBAL_MAX) { 1692 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1693 1694 info->nz_used = irecv[0]; 1695 info->nz_allocated = irecv[1]; 1696 info->nz_unneeded = irecv[2]; 1697 info->memory = irecv[3]; 1698 info->mallocs = irecv[4]; 1699 } else if (flag == MAT_GLOBAL_SUM) { 1700 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1701 1702 info->nz_used = irecv[0]; 1703 info->nz_allocated = irecv[1]; 1704 info->nz_unneeded = irecv[2]; 1705 info->memory = irecv[3]; 1706 info->mallocs = irecv[4]; 1707 } 1708 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1709 info->fill_ratio_needed = 0; 1710 info->factor_mallocs = 0; 1711 PetscFunctionReturn(0); 1712 } 1713 1714 #undef __FUNCT__ 1715 #define __FUNCT__ "MatSetOption_MPIAIJ" 1716 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1717 { 1718 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1719 PetscErrorCode ierr; 1720 1721 PetscFunctionBegin; 1722 switch (op) { 1723 case MAT_NEW_NONZERO_LOCATIONS: 1724 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1725 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1726 case MAT_KEEP_NONZERO_PATTERN: 1727 case MAT_NEW_NONZERO_LOCATION_ERR: 1728 case MAT_USE_INODES: 1729 case MAT_IGNORE_ZERO_ENTRIES: 1730 MatCheckPreallocated(A,1); 1731 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1732 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1733 break; 1734 case MAT_ROW_ORIENTED: 1735 a->roworiented = flg; 1736 1737 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1738 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1739 break; 1740 case MAT_NEW_DIAGONALS: 1741 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1742 break; 1743 case MAT_IGNORE_OFF_PROC_ENTRIES: 1744 a->donotstash = flg; 1745 break; 1746 case MAT_SPD: 1747 A->spd_set = PETSC_TRUE; 1748 A->spd = flg; 1749 if (flg) { 1750 A->symmetric = PETSC_TRUE; 1751 A->structurally_symmetric = PETSC_TRUE; 1752 A->symmetric_set = PETSC_TRUE; 1753 A->structurally_symmetric_set = PETSC_TRUE; 1754 } 1755 break; 1756 case MAT_SYMMETRIC: 1757 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1758 break; 1759 case MAT_STRUCTURALLY_SYMMETRIC: 1760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1761 break; 1762 case MAT_HERMITIAN: 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_SYMMETRY_ETERNAL: 1766 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1767 break; 1768 default: 1769 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1770 } 1771 PetscFunctionReturn(0); 1772 } 1773 1774 #undef __FUNCT__ 1775 #define __FUNCT__ "MatGetRow_MPIAIJ" 1776 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1777 { 1778 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1779 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1780 PetscErrorCode ierr; 1781 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1782 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1783 PetscInt *cmap,*idx_p; 1784 1785 PetscFunctionBegin; 1786 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1787 mat->getrowactive = PETSC_TRUE; 1788 1789 if (!mat->rowvalues && (idx || v)) { 1790 /* 1791 allocate enough space to hold information from the longest row. 1792 */ 1793 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1794 PetscInt max = 1,tmp; 1795 for (i=0; i<matin->rmap->n; i++) { 1796 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1797 if (max < tmp) max = tmp; 1798 } 1799 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1800 } 1801 1802 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1803 lrow = row - rstart; 1804 1805 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1806 if (!v) {pvA = 0; pvB = 0;} 1807 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1808 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1809 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1810 nztot = nzA + nzB; 1811 1812 cmap = mat->garray; 1813 if (v || idx) { 1814 if (nztot) { 1815 /* Sort by increasing column numbers, assuming A and B already sorted */ 1816 PetscInt imark = -1; 1817 if (v) { 1818 *v = v_p = mat->rowvalues; 1819 for (i=0; i<nzB; i++) { 1820 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1821 else break; 1822 } 1823 imark = i; 1824 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1825 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1826 } 1827 if (idx) { 1828 *idx = idx_p = mat->rowindices; 1829 if (imark > -1) { 1830 for (i=0; i<imark; i++) { 1831 idx_p[i] = cmap[cworkB[i]]; 1832 } 1833 } else { 1834 for (i=0; i<nzB; i++) { 1835 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1836 else break; 1837 } 1838 imark = i; 1839 } 1840 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1841 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1842 } 1843 } else { 1844 if (idx) *idx = 0; 1845 if (v) *v = 0; 1846 } 1847 } 1848 *nz = nztot; 1849 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1850 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1851 PetscFunctionReturn(0); 1852 } 1853 1854 #undef __FUNCT__ 1855 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1856 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1857 { 1858 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1859 1860 PetscFunctionBegin; 1861 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1862 aij->getrowactive = PETSC_FALSE; 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatNorm_MPIAIJ" 1868 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1869 { 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1872 PetscErrorCode ierr; 1873 PetscInt i,j,cstart = mat->cmap->rstart; 1874 PetscReal sum = 0.0; 1875 MatScalar *v; 1876 1877 PetscFunctionBegin; 1878 if (aij->size == 1) { 1879 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1880 } else { 1881 if (type == NORM_FROBENIUS) { 1882 v = amat->a; 1883 for (i=0; i<amat->nz; i++) { 1884 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1885 } 1886 v = bmat->a; 1887 for (i=0; i<bmat->nz; i++) { 1888 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1889 } 1890 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1891 *norm = PetscSqrtReal(*norm); 1892 } else if (type == NORM_1) { /* max column norm */ 1893 PetscReal *tmp,*tmp2; 1894 PetscInt *jj,*garray = aij->garray; 1895 ierr = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr); 1896 ierr = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr); 1897 *norm = 0.0; 1898 v = amat->a; jj = amat->j; 1899 for (j=0; j<amat->nz; j++) { 1900 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1901 } 1902 v = bmat->a; jj = bmat->j; 1903 for (j=0; j<bmat->nz; j++) { 1904 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1905 } 1906 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1907 for (j=0; j<mat->cmap->N; j++) { 1908 if (tmp2[j] > *norm) *norm = tmp2[j]; 1909 } 1910 ierr = PetscFree(tmp);CHKERRQ(ierr); 1911 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1912 } else if (type == NORM_INFINITY) { /* max row norm */ 1913 PetscReal ntemp = 0.0; 1914 for (j=0; j<aij->A->rmap->n; j++) { 1915 v = amat->a + amat->i[j]; 1916 sum = 0.0; 1917 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1918 sum += PetscAbsScalar(*v); v++; 1919 } 1920 v = bmat->a + bmat->i[j]; 1921 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1922 sum += PetscAbsScalar(*v); v++; 1923 } 1924 if (sum > ntemp) ntemp = sum; 1925 } 1926 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1927 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1928 } 1929 PetscFunctionReturn(0); 1930 } 1931 1932 #undef __FUNCT__ 1933 #define __FUNCT__ "MatTranspose_MPIAIJ" 1934 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1935 { 1936 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1937 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1938 PetscErrorCode ierr; 1939 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1940 PetscInt cstart = A->cmap->rstart,ncol; 1941 Mat B; 1942 MatScalar *array; 1943 1944 PetscFunctionBegin; 1945 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1946 1947 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1948 ai = Aloc->i; aj = Aloc->j; 1949 bi = Bloc->i; bj = Bloc->j; 1950 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1951 PetscInt *d_nnz,*g_nnz,*o_nnz; 1952 PetscSFNode *oloc; 1953 PETSC_UNUSED PetscSF sf; 1954 1955 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1956 /* compute d_nnz for preallocation */ 1957 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1958 for (i=0; i<ai[ma]; i++) { 1959 d_nnz[aj[i]]++; 1960 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1961 } 1962 /* compute local off-diagonal contributions */ 1963 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1964 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1965 /* map those to global */ 1966 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1967 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1968 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1969 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1970 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1971 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1972 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1973 1974 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1975 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1976 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1977 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1978 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1979 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1980 } else { 1981 B = *matout; 1982 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1983 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1984 } 1985 1986 /* copy over the A part */ 1987 array = Aloc->a; 1988 row = A->rmap->rstart; 1989 for (i=0; i<ma; i++) { 1990 ncol = ai[i+1]-ai[i]; 1991 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1992 row++; 1993 array += ncol; aj += ncol; 1994 } 1995 aj = Aloc->j; 1996 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1997 1998 /* copy over the B part */ 1999 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2000 array = Bloc->a; 2001 row = A->rmap->rstart; 2002 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2003 cols_tmp = cols; 2004 for (i=0; i<mb; i++) { 2005 ncol = bi[i+1]-bi[i]; 2006 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2007 row++; 2008 array += ncol; cols_tmp += ncol; 2009 } 2010 ierr = PetscFree(cols);CHKERRQ(ierr); 2011 2012 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2013 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2014 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2015 *matout = B; 2016 } else { 2017 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2018 } 2019 PetscFunctionReturn(0); 2020 } 2021 2022 #undef __FUNCT__ 2023 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2024 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2025 { 2026 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2027 Mat a = aij->A,b = aij->B; 2028 PetscErrorCode ierr; 2029 PetscInt s1,s2,s3; 2030 2031 PetscFunctionBegin; 2032 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2033 if (rr) { 2034 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2035 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2036 /* Overlap communication with computation. */ 2037 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2038 } 2039 if (ll) { 2040 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2041 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2042 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2043 } 2044 /* scale the diagonal block */ 2045 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2046 2047 if (rr) { 2048 /* Do a scatter end and then right scale the off-diagonal block */ 2049 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2050 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2051 } 2052 PetscFunctionReturn(0); 2053 } 2054 2055 #undef __FUNCT__ 2056 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2057 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2058 { 2059 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatEqual_MPIAIJ" 2069 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2070 { 2071 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2072 Mat a,b,c,d; 2073 PetscBool flg; 2074 PetscErrorCode ierr; 2075 2076 PetscFunctionBegin; 2077 a = matA->A; b = matA->B; 2078 c = matB->A; d = matB->B; 2079 2080 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2081 if (flg) { 2082 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2083 } 2084 ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatCopy_MPIAIJ" 2090 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2091 { 2092 PetscErrorCode ierr; 2093 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2094 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2095 2096 PetscFunctionBegin; 2097 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2098 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2099 /* because of the column compression in the off-processor part of the matrix a->B, 2100 the number of columns in a->B and b->B may be different, hence we cannot call 2101 the MatCopy() directly on the two parts. If need be, we can provide a more 2102 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2103 then copying the submatrices */ 2104 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2105 } else { 2106 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2107 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatSetUp_MPIAIJ" 2114 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /* 2124 Computes the number of nonzeros per row needed for preallocation when X and Y 2125 have different nonzero structure. 2126 */ 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2129 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2130 { 2131 PetscInt i,j,k,nzx,nzy; 2132 2133 PetscFunctionBegin; 2134 /* Set the number of nonzeros in the new matrix */ 2135 for (i=0; i<m; i++) { 2136 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2137 nzx = xi[i+1] - xi[i]; 2138 nzy = yi[i+1] - yi[i]; 2139 nnz[i] = 0; 2140 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2141 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2142 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2143 nnz[i]++; 2144 } 2145 for (; k<nzy; k++) nnz[i]++; 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2151 #undef __FUNCT__ 2152 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2153 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2154 { 2155 PetscErrorCode ierr; 2156 PetscInt m = Y->rmap->N; 2157 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2158 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2159 2160 PetscFunctionBegin; 2161 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 #undef __FUNCT__ 2166 #define __FUNCT__ "MatAXPY_MPIAIJ" 2167 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2168 { 2169 PetscErrorCode ierr; 2170 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2171 PetscBLASInt bnz,one=1; 2172 Mat_SeqAIJ *x,*y; 2173 2174 PetscFunctionBegin; 2175 if (str == SAME_NONZERO_PATTERN) { 2176 PetscScalar alpha = a; 2177 x = (Mat_SeqAIJ*)xx->A->data; 2178 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2179 y = (Mat_SeqAIJ*)yy->A->data; 2180 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2181 x = (Mat_SeqAIJ*)xx->B->data; 2182 y = (Mat_SeqAIJ*)yy->B->data; 2183 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2184 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2185 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2186 } else if (str == SUBSET_NONZERO_PATTERN) { 2187 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2188 } else { 2189 Mat B; 2190 PetscInt *nnz_d,*nnz_o; 2191 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2192 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2193 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2194 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2195 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2196 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2197 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2198 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2199 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2200 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2201 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2202 ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 2203 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2204 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2205 } 2206 PetscFunctionReturn(0); 2207 } 2208 2209 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2210 2211 #undef __FUNCT__ 2212 #define __FUNCT__ "MatConjugate_MPIAIJ" 2213 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2214 { 2215 #if defined(PETSC_USE_COMPLEX) 2216 PetscErrorCode ierr; 2217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2218 2219 PetscFunctionBegin; 2220 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2221 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2222 #else 2223 PetscFunctionBegin; 2224 #endif 2225 PetscFunctionReturn(0); 2226 } 2227 2228 #undef __FUNCT__ 2229 #define __FUNCT__ "MatRealPart_MPIAIJ" 2230 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2231 { 2232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2233 PetscErrorCode ierr; 2234 2235 PetscFunctionBegin; 2236 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2237 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2238 PetscFunctionReturn(0); 2239 } 2240 2241 #undef __FUNCT__ 2242 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2243 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2244 { 2245 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2246 PetscErrorCode ierr; 2247 2248 PetscFunctionBegin; 2249 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2250 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2251 PetscFunctionReturn(0); 2252 } 2253 2254 #if defined(PETSC_HAVE_PBGL) 2255 2256 #include <boost/parallel/mpi/bsp_process_group.hpp> 2257 #include <boost/graph/distributed/ilu_default_graph.hpp> 2258 #include <boost/graph/distributed/ilu_0_block.hpp> 2259 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2260 #include <boost/graph/distributed/petsc/interface.hpp> 2261 #include <boost/multi_array.hpp> 2262 #include <boost/parallel/distributed_property_map->hpp> 2263 2264 #undef __FUNCT__ 2265 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2266 /* 2267 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2268 */ 2269 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2270 { 2271 namespace petsc = boost::distributed::petsc; 2272 2273 namespace graph_dist = boost::graph::distributed; 2274 using boost::graph::distributed::ilu_default::process_group_type; 2275 using boost::graph::ilu_permuted; 2276 2277 PetscBool row_identity, col_identity; 2278 PetscContainer c; 2279 PetscInt m, n, M, N; 2280 PetscErrorCode ierr; 2281 2282 PetscFunctionBegin; 2283 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2284 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2285 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2286 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2287 2288 process_group_type pg; 2289 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2290 lgraph_type *lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2291 lgraph_type& level_graph = *lgraph_p; 2292 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2293 2294 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2295 ilu_permuted(level_graph); 2296 2297 /* put together the new matrix */ 2298 ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr); 2299 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2300 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2301 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2302 ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr); 2303 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2304 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2305 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2306 2307 ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c); 2308 ierr = PetscContainerSetPointer(c, lgraph_p); 2309 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2310 ierr = PetscContainerDestroy(&c); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2316 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2317 { 2318 PetscFunctionBegin; 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #undef __FUNCT__ 2323 #define __FUNCT__ "MatSolve_MPIAIJ" 2324 /* 2325 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2326 */ 2327 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2328 { 2329 namespace graph_dist = boost::graph::distributed; 2330 2331 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2332 lgraph_type *lgraph_p; 2333 PetscContainer c; 2334 PetscErrorCode ierr; 2335 2336 PetscFunctionBegin; 2337 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr); 2338 ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr); 2339 ierr = VecCopy(b, x);CHKERRQ(ierr); 2340 2341 PetscScalar *array_x; 2342 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2343 PetscInt sx; 2344 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2345 2346 PetscScalar *array_b; 2347 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2348 PetscInt sb; 2349 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2350 2351 lgraph_type& level_graph = *lgraph_p; 2352 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2353 2354 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2355 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]); 2356 array_ref_type ref_x(array_x, boost::extents[num_vertices(graph)]); 2357 2358 typedef boost::iterator_property_map<array_ref_type::iterator, 2359 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2360 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)); 2361 gvector_type vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2362 2363 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2364 PetscFunctionReturn(0); 2365 } 2366 #endif 2367 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced" 2371 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2372 { 2373 PetscMPIInt rank,size; 2374 MPI_Comm comm; 2375 PetscErrorCode ierr; 2376 PetscInt nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N; 2377 PetscMPIInt *send_rank= NULL,*recv_rank=NULL,subrank,subsize; 2378 PetscInt *rowrange = mat->rmap->range; 2379 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2380 Mat A = aij->A,B=aij->B,C=*matredundant; 2381 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2382 PetscScalar *sbuf_a; 2383 PetscInt nzlocal=a->nz+b->nz; 2384 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2385 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray; 2386 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2387 MatScalar *aworkA,*aworkB; 2388 PetscScalar *vals; 2389 PetscMPIInt tag1,tag2,tag3,imdex; 2390 MPI_Request *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL; 2391 MPI_Request *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL; 2392 MPI_Status recv_status,*send_status; 2393 PetscInt *sbuf_nz=NULL,*rbuf_nz=NULL,count; 2394 PetscInt **rbuf_j=NULL; 2395 PetscScalar **rbuf_a=NULL; 2396 Mat_Redundant *redund =NULL; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2400 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2401 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2402 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2403 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2404 2405 if (reuse == MAT_REUSE_MATRIX) { 2406 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2407 if (subsize == 1) { 2408 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2409 redund = c->redundant; 2410 } else { 2411 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2412 redund = c->redundant; 2413 } 2414 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2415 2416 nsends = redund->nsends; 2417 nrecvs = redund->nrecvs; 2418 send_rank = redund->send_rank; 2419 recv_rank = redund->recv_rank; 2420 sbuf_nz = redund->sbuf_nz; 2421 rbuf_nz = redund->rbuf_nz; 2422 sbuf_j = redund->sbuf_j; 2423 sbuf_a = redund->sbuf_a; 2424 rbuf_j = redund->rbuf_j; 2425 rbuf_a = redund->rbuf_a; 2426 } 2427 2428 if (reuse == MAT_INITIAL_MATRIX) { 2429 PetscInt nleftover,np_subcomm; 2430 2431 /* get the destination processors' id send_rank, nsends and nrecvs */ 2432 ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr); 2433 2434 np_subcomm = size/nsubcomm; 2435 nleftover = size - nsubcomm*np_subcomm; 2436 2437 /* block of codes below is specific for INTERLACED */ 2438 /* ------------------------------------------------*/ 2439 nsends = 0; nrecvs = 0; 2440 for (i=0; i<size; i++) { 2441 if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */ 2442 send_rank[nsends++] = i; 2443 recv_rank[nrecvs++] = i; 2444 } 2445 } 2446 if (rank >= size - nleftover) { /* this proc is a leftover processor */ 2447 i = size-nleftover-1; 2448 j = 0; 2449 while (j < nsubcomm - nleftover) { 2450 send_rank[nsends++] = i; 2451 i--; j++; 2452 } 2453 } 2454 2455 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */ 2456 for (i=0; i<nleftover; i++) { 2457 recv_rank[nrecvs++] = size-nleftover+i; 2458 } 2459 } 2460 /*----------------------------------------------*/ 2461 2462 /* allocate sbuf_j, sbuf_a */ 2463 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2464 ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr); 2465 ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr); 2466 /* 2467 ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr); 2468 ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr); 2469 */ 2470 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2471 2472 /* copy mat's local entries into the buffers */ 2473 if (reuse == MAT_INITIAL_MATRIX) { 2474 rownz_max = 0; 2475 rptr = sbuf_j; 2476 cols = sbuf_j + rend-rstart + 1; 2477 vals = sbuf_a; 2478 rptr[0] = 0; 2479 for (i=0; i<rend-rstart; i++) { 2480 row = i + rstart; 2481 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2482 ncols = nzA + nzB; 2483 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2484 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2485 /* load the column indices for this row into cols */ 2486 lwrite = 0; 2487 for (l=0; l<nzB; l++) { 2488 if ((ctmp = bmap[cworkB[l]]) < cstart) { 2489 vals[lwrite] = aworkB[l]; 2490 cols[lwrite++] = ctmp; 2491 } 2492 } 2493 for (l=0; l<nzA; l++) { 2494 vals[lwrite] = aworkA[l]; 2495 cols[lwrite++] = cstart + cworkA[l]; 2496 } 2497 for (l=0; l<nzB; l++) { 2498 if ((ctmp = bmap[cworkB[l]]) >= cend) { 2499 vals[lwrite] = aworkB[l]; 2500 cols[lwrite++] = ctmp; 2501 } 2502 } 2503 vals += ncols; 2504 cols += ncols; 2505 rptr[i+1] = rptr[i] + ncols; 2506 if (rownz_max < ncols) rownz_max = ncols; 2507 } 2508 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2509 } else { /* only copy matrix values into sbuf_a */ 2510 rptr = sbuf_j; 2511 vals = sbuf_a; 2512 rptr[0] = 0; 2513 for (i=0; i<rend-rstart; i++) { 2514 row = i + rstart; 2515 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2516 ncols = nzA + nzB; 2517 cworkB = b->j + b->i[i]; 2518 aworkA = a->a + a->i[i]; 2519 aworkB = b->a + b->i[i]; 2520 lwrite = 0; 2521 for (l=0; l<nzB; l++) { 2522 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2523 } 2524 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2525 for (l=0; l<nzB; l++) { 2526 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2527 } 2528 vals += ncols; 2529 rptr[i+1] = rptr[i] + ncols; 2530 } 2531 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2532 2533 /* send nzlocal to others, and recv other's nzlocal */ 2534 /*--------------------------------------------------*/ 2535 if (reuse == MAT_INITIAL_MATRIX) { 2536 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2537 2538 s_waits2 = s_waits3 + nsends; 2539 s_waits1 = s_waits2 + nsends; 2540 r_waits1 = s_waits1 + nsends; 2541 r_waits2 = r_waits1 + nrecvs; 2542 r_waits3 = r_waits2 + nrecvs; 2543 } else { 2544 ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr); 2545 2546 r_waits3 = s_waits3 + nsends; 2547 } 2548 2549 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2550 if (reuse == MAT_INITIAL_MATRIX) { 2551 /* get new tags to keep the communication clean */ 2552 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2553 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2554 ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr); 2555 2556 /* post receives of other's nzlocal */ 2557 for (i=0; i<nrecvs; i++) { 2558 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2559 } 2560 /* send nzlocal to others */ 2561 for (i=0; i<nsends; i++) { 2562 sbuf_nz[i] = nzlocal; 2563 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2564 } 2565 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2566 count = nrecvs; 2567 while (count) { 2568 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2569 2570 recv_rank[imdex] = recv_status.MPI_SOURCE; 2571 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2572 ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr); 2573 2574 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2575 2576 rbuf_nz[imdex] += i + 2; 2577 2578 ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr); 2579 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2580 count--; 2581 } 2582 /* wait on sends of nzlocal */ 2583 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2584 /* send mat->i,j to others, and recv from other's */ 2585 /*------------------------------------------------*/ 2586 for (i=0; i<nsends; i++) { 2587 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2588 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2589 } 2590 /* wait on receives of mat->i,j */ 2591 /*------------------------------*/ 2592 count = nrecvs; 2593 while (count) { 2594 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2595 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2596 count--; 2597 } 2598 /* wait on sends of mat->i,j */ 2599 /*---------------------------*/ 2600 if (nsends) { 2601 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2602 } 2603 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2604 2605 /* post receives, send and receive mat->a */ 2606 /*----------------------------------------*/ 2607 for (imdex=0; imdex<nrecvs; imdex++) { 2608 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2609 } 2610 for (i=0; i<nsends; i++) { 2611 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2612 } 2613 count = nrecvs; 2614 while (count) { 2615 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2616 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2617 count--; 2618 } 2619 if (nsends) { 2620 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2621 } 2622 2623 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2624 2625 /* create redundant matrix */ 2626 /*-------------------------*/ 2627 if (reuse == MAT_INITIAL_MATRIX) { 2628 const PetscInt *range; 2629 PetscInt rstart_sub,rend_sub,mloc_sub; 2630 2631 /* compute rownz_max for preallocation */ 2632 for (imdex=0; imdex<nrecvs; imdex++) { 2633 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2634 rptr = rbuf_j[imdex]; 2635 for (i=0; i<j; i++) { 2636 ncols = rptr[i+1] - rptr[i]; 2637 if (rownz_max < ncols) rownz_max = ncols; 2638 } 2639 } 2640 2641 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2642 2643 /* get local size of redundant matrix 2644 - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */ 2645 ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr); 2646 rstart_sub = range[nsubcomm*subrank]; 2647 if (subrank+1 < subsize) { /* not the last proc in subcomm */ 2648 rend_sub = range[nsubcomm*(subrank+1)]; 2649 } else { 2650 rend_sub = mat->rmap->N; 2651 } 2652 mloc_sub = rend_sub - rstart_sub; 2653 2654 if (M == N) { 2655 ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2656 } else { /* non-square matrix */ 2657 ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr); 2658 } 2659 ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr); 2660 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2661 ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr); 2662 ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr); 2663 } else { 2664 C = *matredundant; 2665 } 2666 2667 /* insert local matrix entries */ 2668 rptr = sbuf_j; 2669 cols = sbuf_j + rend-rstart + 1; 2670 vals = sbuf_a; 2671 for (i=0; i<rend-rstart; i++) { 2672 row = i + rstart; 2673 ncols = rptr[i+1] - rptr[i]; 2674 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2675 vals += ncols; 2676 cols += ncols; 2677 } 2678 /* insert received matrix entries */ 2679 for (imdex=0; imdex<nrecvs; imdex++) { 2680 rstart = rowrange[recv_rank[imdex]]; 2681 rend = rowrange[recv_rank[imdex]+1]; 2682 /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */ 2683 rptr = rbuf_j[imdex]; 2684 cols = rbuf_j[imdex] + rend-rstart + 1; 2685 vals = rbuf_a[imdex]; 2686 for (i=0; i<rend-rstart; i++) { 2687 row = i + rstart; 2688 ncols = rptr[i+1] - rptr[i]; 2689 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2690 vals += ncols; 2691 cols += ncols; 2692 } 2693 } 2694 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2695 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2696 2697 if (reuse == MAT_INITIAL_MATRIX) { 2698 *matredundant = C; 2699 2700 /* create a supporting struct and attach it to C for reuse */ 2701 ierr = PetscNewLog(C,&redund);CHKERRQ(ierr); 2702 if (subsize == 1) { 2703 Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data; 2704 c->redundant = redund; 2705 } else { 2706 Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data; 2707 c->redundant = redund; 2708 } 2709 2710 redund->nzlocal = nzlocal; 2711 redund->nsends = nsends; 2712 redund->nrecvs = nrecvs; 2713 redund->send_rank = send_rank; 2714 redund->recv_rank = recv_rank; 2715 redund->sbuf_nz = sbuf_nz; 2716 redund->rbuf_nz = rbuf_nz; 2717 redund->sbuf_j = sbuf_j; 2718 redund->sbuf_a = sbuf_a; 2719 redund->rbuf_j = rbuf_j; 2720 redund->rbuf_a = rbuf_a; 2721 redund->psubcomm = NULL; 2722 } 2723 PetscFunctionReturn(0); 2724 } 2725 2726 #undef __FUNCT__ 2727 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2728 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant) 2729 { 2730 PetscErrorCode ierr; 2731 MPI_Comm comm; 2732 PetscMPIInt size,subsize; 2733 PetscInt mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N; 2734 Mat_Redundant *redund=NULL; 2735 PetscSubcomm psubcomm=NULL; 2736 MPI_Comm subcomm_in=subcomm; 2737 Mat *matseq; 2738 IS isrow,iscol; 2739 2740 PetscFunctionBegin; 2741 if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */ 2742 if (reuse == MAT_INITIAL_MATRIX) { 2743 /* create psubcomm, then get subcomm */ 2744 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 2745 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2746 if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size); 2747 2748 ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr); 2749 ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr); 2750 ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr); 2751 ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr); 2752 subcomm = psubcomm->comm; 2753 } else { /* retrieve psubcomm and subcomm */ 2754 ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr); 2755 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2756 if (subsize == 1) { 2757 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2758 redund = c->redundant; 2759 } else { 2760 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2761 redund = c->redundant; 2762 } 2763 psubcomm = redund->psubcomm; 2764 } 2765 if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) { 2766 ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr); 2767 if (reuse == MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */ 2768 ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr); 2769 if (subsize == 1) { 2770 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2771 c->redundant->psubcomm = psubcomm; 2772 } else { 2773 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2774 c->redundant->psubcomm = psubcomm ; 2775 } 2776 } 2777 PetscFunctionReturn(0); 2778 } 2779 } 2780 2781 /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */ 2782 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2783 if (reuse == MAT_INITIAL_MATRIX) { 2784 /* create a local sequential matrix matseq[0] */ 2785 mloc_sub = PETSC_DECIDE; 2786 ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr); 2787 ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr); 2788 rstart = rend - mloc_sub; 2789 ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr); 2790 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr); 2791 } else { /* reuse == MAT_REUSE_MATRIX */ 2792 if (subsize == 1) { 2793 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2794 redund = c->redundant; 2795 } else { 2796 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2797 redund = c->redundant; 2798 } 2799 2800 isrow = redund->isrow; 2801 iscol = redund->iscol; 2802 matseq = redund->matseq; 2803 } 2804 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr); 2805 ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr); 2806 2807 if (reuse == MAT_INITIAL_MATRIX) { 2808 /* create a supporting struct and attach it to C for reuse */ 2809 ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr); 2810 if (subsize == 1) { 2811 Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data; 2812 c->redundant = redund; 2813 } else { 2814 Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data; 2815 c->redundant = redund; 2816 } 2817 redund->isrow = isrow; 2818 redund->iscol = iscol; 2819 redund->matseq = matseq; 2820 redund->psubcomm = psubcomm; 2821 } 2822 PetscFunctionReturn(0); 2823 } 2824 2825 #undef __FUNCT__ 2826 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2827 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2828 { 2829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2830 PetscErrorCode ierr; 2831 PetscInt i,*idxb = 0; 2832 PetscScalar *va,*vb; 2833 Vec vtmp; 2834 2835 PetscFunctionBegin; 2836 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2837 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2838 if (idx) { 2839 for (i=0; i<A->rmap->n; i++) { 2840 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2841 } 2842 } 2843 2844 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2845 if (idx) { 2846 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2847 } 2848 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2849 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2850 2851 for (i=0; i<A->rmap->n; i++) { 2852 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2853 va[i] = vb[i]; 2854 if (idx) idx[i] = a->garray[idxb[i]]; 2855 } 2856 } 2857 2858 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2859 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2860 ierr = PetscFree(idxb);CHKERRQ(ierr); 2861 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2862 PetscFunctionReturn(0); 2863 } 2864 2865 #undef __FUNCT__ 2866 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2867 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2868 { 2869 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2870 PetscErrorCode ierr; 2871 PetscInt i,*idxb = 0; 2872 PetscScalar *va,*vb; 2873 Vec vtmp; 2874 2875 PetscFunctionBegin; 2876 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2877 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2878 if (idx) { 2879 for (i=0; i<A->cmap->n; i++) { 2880 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2881 } 2882 } 2883 2884 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2885 if (idx) { 2886 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2887 } 2888 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2889 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2890 2891 for (i=0; i<A->rmap->n; i++) { 2892 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2893 va[i] = vb[i]; 2894 if (idx) idx[i] = a->garray[idxb[i]]; 2895 } 2896 } 2897 2898 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2899 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2900 ierr = PetscFree(idxb);CHKERRQ(ierr); 2901 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 #undef __FUNCT__ 2906 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2907 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2908 { 2909 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2910 PetscInt n = A->rmap->n; 2911 PetscInt cstart = A->cmap->rstart; 2912 PetscInt *cmap = mat->garray; 2913 PetscInt *diagIdx, *offdiagIdx; 2914 Vec diagV, offdiagV; 2915 PetscScalar *a, *diagA, *offdiagA; 2916 PetscInt r; 2917 PetscErrorCode ierr; 2918 2919 PetscFunctionBegin; 2920 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2921 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2922 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2923 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2924 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2925 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2926 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2927 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2928 for (r = 0; r < n; ++r) { 2929 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2930 a[r] = diagA[r]; 2931 idx[r] = cstart + diagIdx[r]; 2932 } else { 2933 a[r] = offdiagA[r]; 2934 idx[r] = cmap[offdiagIdx[r]]; 2935 } 2936 } 2937 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2938 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2939 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2940 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2941 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2942 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2943 PetscFunctionReturn(0); 2944 } 2945 2946 #undef __FUNCT__ 2947 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2948 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2949 { 2950 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2951 PetscInt n = A->rmap->n; 2952 PetscInt cstart = A->cmap->rstart; 2953 PetscInt *cmap = mat->garray; 2954 PetscInt *diagIdx, *offdiagIdx; 2955 Vec diagV, offdiagV; 2956 PetscScalar *a, *diagA, *offdiagA; 2957 PetscInt r; 2958 PetscErrorCode ierr; 2959 2960 PetscFunctionBegin; 2961 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2962 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2963 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2964 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2965 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2966 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2967 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2968 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2969 for (r = 0; r < n; ++r) { 2970 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2971 a[r] = diagA[r]; 2972 idx[r] = cstart + diagIdx[r]; 2973 } else { 2974 a[r] = offdiagA[r]; 2975 idx[r] = cmap[offdiagIdx[r]]; 2976 } 2977 } 2978 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2979 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2980 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2981 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2982 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2983 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2984 PetscFunctionReturn(0); 2985 } 2986 2987 #undef __FUNCT__ 2988 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2989 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2990 { 2991 PetscErrorCode ierr; 2992 Mat *dummy; 2993 2994 PetscFunctionBegin; 2995 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2996 *newmat = *dummy; 2997 ierr = PetscFree(dummy);CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 #undef __FUNCT__ 3002 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3003 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3004 { 3005 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3006 PetscErrorCode ierr; 3007 3008 PetscFunctionBegin; 3009 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3010 PetscFunctionReturn(0); 3011 } 3012 3013 #undef __FUNCT__ 3014 #define __FUNCT__ "MatSetRandom_MPIAIJ" 3015 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 3016 { 3017 PetscErrorCode ierr; 3018 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 3019 3020 PetscFunctionBegin; 3021 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 3022 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 3023 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3024 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3025 PetscFunctionReturn(0); 3026 } 3027 3028 /* -------------------------------------------------------------------*/ 3029 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3030 MatGetRow_MPIAIJ, 3031 MatRestoreRow_MPIAIJ, 3032 MatMult_MPIAIJ, 3033 /* 4*/ MatMultAdd_MPIAIJ, 3034 MatMultTranspose_MPIAIJ, 3035 MatMultTransposeAdd_MPIAIJ, 3036 #if defined(PETSC_HAVE_PBGL) 3037 MatSolve_MPIAIJ, 3038 #else 3039 0, 3040 #endif 3041 0, 3042 0, 3043 /*10*/ 0, 3044 0, 3045 0, 3046 MatSOR_MPIAIJ, 3047 MatTranspose_MPIAIJ, 3048 /*15*/ MatGetInfo_MPIAIJ, 3049 MatEqual_MPIAIJ, 3050 MatGetDiagonal_MPIAIJ, 3051 MatDiagonalScale_MPIAIJ, 3052 MatNorm_MPIAIJ, 3053 /*20*/ MatAssemblyBegin_MPIAIJ, 3054 MatAssemblyEnd_MPIAIJ, 3055 MatSetOption_MPIAIJ, 3056 MatZeroEntries_MPIAIJ, 3057 /*24*/ MatZeroRows_MPIAIJ, 3058 0, 3059 #if defined(PETSC_HAVE_PBGL) 3060 0, 3061 #else 3062 0, 3063 #endif 3064 0, 3065 0, 3066 /*29*/ MatSetUp_MPIAIJ, 3067 #if defined(PETSC_HAVE_PBGL) 3068 0, 3069 #else 3070 0, 3071 #endif 3072 0, 3073 0, 3074 0, 3075 /*34*/ MatDuplicate_MPIAIJ, 3076 0, 3077 0, 3078 0, 3079 0, 3080 /*39*/ MatAXPY_MPIAIJ, 3081 MatGetSubMatrices_MPIAIJ, 3082 MatIncreaseOverlap_MPIAIJ, 3083 MatGetValues_MPIAIJ, 3084 MatCopy_MPIAIJ, 3085 /*44*/ MatGetRowMax_MPIAIJ, 3086 MatScale_MPIAIJ, 3087 0, 3088 0, 3089 MatZeroRowsColumns_MPIAIJ, 3090 /*49*/ MatSetRandom_MPIAIJ, 3091 0, 3092 0, 3093 0, 3094 0, 3095 /*54*/ MatFDColoringCreate_MPIXAIJ, 3096 0, 3097 MatSetUnfactored_MPIAIJ, 3098 MatPermute_MPIAIJ, 3099 0, 3100 /*59*/ MatGetSubMatrix_MPIAIJ, 3101 MatDestroy_MPIAIJ, 3102 MatView_MPIAIJ, 3103 0, 3104 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 3105 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 3106 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 3107 0, 3108 0, 3109 0, 3110 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3111 MatGetRowMinAbs_MPIAIJ, 3112 0, 3113 MatSetColoring_MPIAIJ, 3114 0, 3115 MatSetValuesAdifor_MPIAIJ, 3116 /*75*/ MatFDColoringApply_AIJ, 3117 0, 3118 0, 3119 0, 3120 MatFindZeroDiagonals_MPIAIJ, 3121 /*80*/ 0, 3122 0, 3123 0, 3124 /*83*/ MatLoad_MPIAIJ, 3125 0, 3126 0, 3127 0, 3128 0, 3129 0, 3130 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3131 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3132 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3133 MatPtAP_MPIAIJ_MPIAIJ, 3134 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3135 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 3136 0, 3137 0, 3138 0, 3139 0, 3140 /*99*/ 0, 3141 0, 3142 0, 3143 MatConjugate_MPIAIJ, 3144 0, 3145 /*104*/MatSetValuesRow_MPIAIJ, 3146 MatRealPart_MPIAIJ, 3147 MatImaginaryPart_MPIAIJ, 3148 0, 3149 0, 3150 /*109*/0, 3151 MatGetRedundantMatrix_MPIAIJ, 3152 MatGetRowMin_MPIAIJ, 3153 0, 3154 0, 3155 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3156 0, 3157 0, 3158 0, 3159 0, 3160 /*119*/0, 3161 0, 3162 0, 3163 0, 3164 MatGetMultiProcBlock_MPIAIJ, 3165 /*124*/MatFindNonzeroRows_MPIAIJ, 3166 MatGetColumnNorms_MPIAIJ, 3167 MatInvertBlockDiagonal_MPIAIJ, 3168 0, 3169 MatGetSubMatricesParallel_MPIAIJ, 3170 /*129*/0, 3171 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3172 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3173 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3174 0, 3175 /*134*/0, 3176 0, 3177 0, 3178 0, 3179 0, 3180 /*139*/0, 3181 0, 3182 0, 3183 MatFDColoringSetUp_MPIXAIJ 3184 }; 3185 3186 /* ----------------------------------------------------------------------------------------*/ 3187 3188 #undef __FUNCT__ 3189 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3190 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3191 { 3192 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3193 PetscErrorCode ierr; 3194 3195 PetscFunctionBegin; 3196 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3197 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3198 PetscFunctionReturn(0); 3199 } 3200 3201 #undef __FUNCT__ 3202 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3203 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3204 { 3205 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 3206 PetscErrorCode ierr; 3207 3208 PetscFunctionBegin; 3209 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3210 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3211 PetscFunctionReturn(0); 3212 } 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3216 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3217 { 3218 Mat_MPIAIJ *b; 3219 PetscErrorCode ierr; 3220 3221 PetscFunctionBegin; 3222 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3223 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3224 b = (Mat_MPIAIJ*)B->data; 3225 3226 if (!B->preallocated) { 3227 /* Explicitly create 2 MATSEQAIJ matrices. */ 3228 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3229 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3230 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 3231 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3232 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 3233 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3234 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3235 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 3236 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3237 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 3238 } 3239 3240 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3241 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3242 B->preallocated = PETSC_TRUE; 3243 PetscFunctionReturn(0); 3244 } 3245 3246 #undef __FUNCT__ 3247 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3248 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3249 { 3250 Mat mat; 3251 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3252 PetscErrorCode ierr; 3253 3254 PetscFunctionBegin; 3255 *newmat = 0; 3256 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3257 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3258 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 3259 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3260 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3261 a = (Mat_MPIAIJ*)mat->data; 3262 3263 mat->factortype = matin->factortype; 3264 mat->assembled = PETSC_TRUE; 3265 mat->insertmode = NOT_SET_VALUES; 3266 mat->preallocated = PETSC_TRUE; 3267 3268 a->size = oldmat->size; 3269 a->rank = oldmat->rank; 3270 a->donotstash = oldmat->donotstash; 3271 a->roworiented = oldmat->roworiented; 3272 a->rowindices = 0; 3273 a->rowvalues = 0; 3274 a->getrowactive = PETSC_FALSE; 3275 3276 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3277 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3278 3279 if (oldmat->colmap) { 3280 #if defined(PETSC_USE_CTABLE) 3281 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3282 #else 3283 ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr); 3284 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3285 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3286 #endif 3287 } else a->colmap = 0; 3288 if (oldmat->garray) { 3289 PetscInt len; 3290 len = oldmat->B->cmap->n; 3291 ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr); 3292 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3293 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3294 } else a->garray = 0; 3295 3296 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3297 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 3298 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3299 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 3300 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3301 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 3302 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3303 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3304 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3305 *newmat = mat; 3306 PetscFunctionReturn(0); 3307 } 3308 3309 3310 3311 #undef __FUNCT__ 3312 #define __FUNCT__ "MatLoad_MPIAIJ" 3313 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3314 { 3315 PetscScalar *vals,*svals; 3316 MPI_Comm comm; 3317 PetscErrorCode ierr; 3318 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3319 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3320 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3321 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 3322 PetscInt cend,cstart,n,*rowners,sizesset=1; 3323 int fd; 3324 PetscInt bs = 1; 3325 3326 PetscFunctionBegin; 3327 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3328 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3329 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3330 if (!rank) { 3331 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3332 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 3333 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3334 } 3335 3336 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3337 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3338 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3339 3340 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3341 3342 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3343 M = header[1]; N = header[2]; 3344 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3345 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3346 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3347 3348 /* If global sizes are set, check if they are consistent with that given in the file */ 3349 if (sizesset) { 3350 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3351 } 3352 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3353 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3354 3355 /* determine ownership of all (block) rows */ 3356 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3357 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3358 else m = newMat->rmap->n; /* Set by user */ 3359 3360 ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr); 3361 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3362 3363 /* First process needs enough room for process with most rows */ 3364 if (!rank) { 3365 mmax = rowners[1]; 3366 for (i=2; i<=size; i++) { 3367 mmax = PetscMax(mmax, rowners[i]); 3368 } 3369 } else mmax = -1; /* unused, but compilers complain */ 3370 3371 rowners[0] = 0; 3372 for (i=2; i<=size; i++) { 3373 rowners[i] += rowners[i-1]; 3374 } 3375 rstart = rowners[rank]; 3376 rend = rowners[rank+1]; 3377 3378 /* distribute row lengths to all processors */ 3379 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3380 if (!rank) { 3381 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3382 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3383 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3384 for (j=0; j<m; j++) { 3385 procsnz[0] += ourlens[j]; 3386 } 3387 for (i=1; i<size; i++) { 3388 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3389 /* calculate the number of nonzeros on each processor */ 3390 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3391 procsnz[i] += rowlengths[j]; 3392 } 3393 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3394 } 3395 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3396 } else { 3397 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3398 } 3399 3400 if (!rank) { 3401 /* determine max buffer needed and allocate it */ 3402 maxnz = 0; 3403 for (i=0; i<size; i++) { 3404 maxnz = PetscMax(maxnz,procsnz[i]); 3405 } 3406 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3407 3408 /* read in my part of the matrix column indices */ 3409 nz = procsnz[0]; 3410 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3411 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3412 3413 /* read in every one elses and ship off */ 3414 for (i=1; i<size; i++) { 3415 nz = procsnz[i]; 3416 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3417 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3418 } 3419 ierr = PetscFree(cols);CHKERRQ(ierr); 3420 } else { 3421 /* determine buffer space needed for message */ 3422 nz = 0; 3423 for (i=0; i<m; i++) { 3424 nz += ourlens[i]; 3425 } 3426 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3427 3428 /* receive message of column indices*/ 3429 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3430 } 3431 3432 /* determine column ownership if matrix is not square */ 3433 if (N != M) { 3434 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3435 else n = newMat->cmap->n; 3436 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3437 cstart = cend - n; 3438 } else { 3439 cstart = rstart; 3440 cend = rend; 3441 n = cend - cstart; 3442 } 3443 3444 /* loop over local rows, determining number of off diagonal entries */ 3445 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3446 jj = 0; 3447 for (i=0; i<m; i++) { 3448 for (j=0; j<ourlens[i]; j++) { 3449 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3450 jj++; 3451 } 3452 } 3453 3454 for (i=0; i<m; i++) { 3455 ourlens[i] -= offlens[i]; 3456 } 3457 if (!sizesset) { 3458 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3459 } 3460 3461 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3462 3463 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3464 3465 for (i=0; i<m; i++) { 3466 ourlens[i] += offlens[i]; 3467 } 3468 3469 if (!rank) { 3470 ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr); 3471 3472 /* read in my part of the matrix numerical values */ 3473 nz = procsnz[0]; 3474 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3475 3476 /* insert into matrix */ 3477 jj = rstart; 3478 smycols = mycols; 3479 svals = vals; 3480 for (i=0; i<m; i++) { 3481 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3482 smycols += ourlens[i]; 3483 svals += ourlens[i]; 3484 jj++; 3485 } 3486 3487 /* read in other processors and ship out */ 3488 for (i=1; i<size; i++) { 3489 nz = procsnz[i]; 3490 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3491 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3492 } 3493 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3494 } else { 3495 /* receive numeric values */ 3496 ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr); 3497 3498 /* receive message of values*/ 3499 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3500 3501 /* insert into matrix */ 3502 jj = rstart; 3503 smycols = mycols; 3504 svals = vals; 3505 for (i=0; i<m; i++) { 3506 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3507 smycols += ourlens[i]; 3508 svals += ourlens[i]; 3509 jj++; 3510 } 3511 } 3512 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3513 ierr = PetscFree(vals);CHKERRQ(ierr); 3514 ierr = PetscFree(mycols);CHKERRQ(ierr); 3515 ierr = PetscFree(rowners);CHKERRQ(ierr); 3516 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3517 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 3521 #undef __FUNCT__ 3522 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3523 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3524 { 3525 PetscErrorCode ierr; 3526 IS iscol_local; 3527 PetscInt csize; 3528 3529 PetscFunctionBegin; 3530 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3531 if (call == MAT_REUSE_MATRIX) { 3532 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3533 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3534 } else { 3535 PetscInt cbs; 3536 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3537 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3538 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3539 } 3540 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3541 if (call == MAT_INITIAL_MATRIX) { 3542 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3543 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3544 } 3545 PetscFunctionReturn(0); 3546 } 3547 3548 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3549 #undef __FUNCT__ 3550 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3551 /* 3552 Not great since it makes two copies of the submatrix, first an SeqAIJ 3553 in local and then by concatenating the local matrices the end result. 3554 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3555 3556 Note: This requires a sequential iscol with all indices. 3557 */ 3558 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3559 { 3560 PetscErrorCode ierr; 3561 PetscMPIInt rank,size; 3562 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3563 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3564 PetscBool allcolumns, colflag; 3565 Mat M,Mreuse; 3566 MatScalar *vwork,*aa; 3567 MPI_Comm comm; 3568 Mat_SeqAIJ *aij; 3569 3570 PetscFunctionBegin; 3571 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3572 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3573 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3574 3575 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3576 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3577 if (colflag && ncol == mat->cmap->N) { 3578 allcolumns = PETSC_TRUE; 3579 } else { 3580 allcolumns = PETSC_FALSE; 3581 } 3582 if (call == MAT_REUSE_MATRIX) { 3583 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3584 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3585 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3586 } else { 3587 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3588 } 3589 3590 /* 3591 m - number of local rows 3592 n - number of columns (same on all processors) 3593 rstart - first row in new global matrix generated 3594 */ 3595 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3596 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3597 if (call == MAT_INITIAL_MATRIX) { 3598 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3599 ii = aij->i; 3600 jj = aij->j; 3601 3602 /* 3603 Determine the number of non-zeros in the diagonal and off-diagonal 3604 portions of the matrix in order to do correct preallocation 3605 */ 3606 3607 /* first get start and end of "diagonal" columns */ 3608 if (csize == PETSC_DECIDE) { 3609 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3610 if (mglobal == n) { /* square matrix */ 3611 nlocal = m; 3612 } else { 3613 nlocal = n/size + ((n % size) > rank); 3614 } 3615 } else { 3616 nlocal = csize; 3617 } 3618 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3619 rstart = rend - nlocal; 3620 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3621 3622 /* next, compute all the lengths */ 3623 ierr = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr); 3624 olens = dlens + m; 3625 for (i=0; i<m; i++) { 3626 jend = ii[i+1] - ii[i]; 3627 olen = 0; 3628 dlen = 0; 3629 for (j=0; j<jend; j++) { 3630 if (*jj < rstart || *jj >= rend) olen++; 3631 else dlen++; 3632 jj++; 3633 } 3634 olens[i] = olen; 3635 dlens[i] = dlen; 3636 } 3637 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3638 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3639 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3640 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3641 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3642 ierr = PetscFree(dlens);CHKERRQ(ierr); 3643 } else { 3644 PetscInt ml,nl; 3645 3646 M = *newmat; 3647 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3648 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3649 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3650 /* 3651 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3652 rather than the slower MatSetValues(). 3653 */ 3654 M->was_assembled = PETSC_TRUE; 3655 M->assembled = PETSC_FALSE; 3656 } 3657 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3658 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3659 ii = aij->i; 3660 jj = aij->j; 3661 aa = aij->a; 3662 for (i=0; i<m; i++) { 3663 row = rstart + i; 3664 nz = ii[i+1] - ii[i]; 3665 cwork = jj; jj += nz; 3666 vwork = aa; aa += nz; 3667 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3668 } 3669 3670 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3671 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3672 *newmat = M; 3673 3674 /* save submatrix used in processor for next request */ 3675 if (call == MAT_INITIAL_MATRIX) { 3676 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3677 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3678 } 3679 PetscFunctionReturn(0); 3680 } 3681 3682 #undef __FUNCT__ 3683 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3684 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3685 { 3686 PetscInt m,cstart, cend,j,nnz,i,d; 3687 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3688 const PetscInt *JJ; 3689 PetscScalar *values; 3690 PetscErrorCode ierr; 3691 3692 PetscFunctionBegin; 3693 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3694 3695 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3696 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3697 m = B->rmap->n; 3698 cstart = B->cmap->rstart; 3699 cend = B->cmap->rend; 3700 rstart = B->rmap->rstart; 3701 3702 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3703 3704 #if defined(PETSC_USE_DEBUGGING) 3705 for (i=0; i<m; i++) { 3706 nnz = Ii[i+1]- Ii[i]; 3707 JJ = J + Ii[i]; 3708 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3709 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3710 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3711 } 3712 #endif 3713 3714 for (i=0; i<m; i++) { 3715 nnz = Ii[i+1]- Ii[i]; 3716 JJ = J + Ii[i]; 3717 nnz_max = PetscMax(nnz_max,nnz); 3718 d = 0; 3719 for (j=0; j<nnz; j++) { 3720 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3721 } 3722 d_nnz[i] = d; 3723 o_nnz[i] = nnz - d; 3724 } 3725 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3726 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3727 3728 if (v) values = (PetscScalar*)v; 3729 else { 3730 ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr); 3731 } 3732 3733 for (i=0; i<m; i++) { 3734 ii = i + rstart; 3735 nnz = Ii[i+1]- Ii[i]; 3736 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3737 } 3738 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3739 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3740 3741 if (!v) { 3742 ierr = PetscFree(values);CHKERRQ(ierr); 3743 } 3744 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3745 PetscFunctionReturn(0); 3746 } 3747 3748 #undef __FUNCT__ 3749 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3750 /*@ 3751 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3752 (the default parallel PETSc format). 3753 3754 Collective on MPI_Comm 3755 3756 Input Parameters: 3757 + B - the matrix 3758 . i - the indices into j for the start of each local row (starts with zero) 3759 . j - the column indices for each local row (starts with zero) 3760 - v - optional values in the matrix 3761 3762 Level: developer 3763 3764 Notes: 3765 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3766 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3767 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3768 3769 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3770 3771 The format which is used for the sparse matrix input, is equivalent to a 3772 row-major ordering.. i.e for the following matrix, the input data expected is 3773 as shown: 3774 3775 1 0 0 3776 2 0 3 P0 3777 ------- 3778 4 5 6 P1 3779 3780 Process0 [P0]: rows_owned=[0,1] 3781 i = {0,1,3} [size = nrow+1 = 2+1] 3782 j = {0,0,2} [size = nz = 6] 3783 v = {1,2,3} [size = nz = 6] 3784 3785 Process1 [P1]: rows_owned=[2] 3786 i = {0,3} [size = nrow+1 = 1+1] 3787 j = {0,1,2} [size = nz = 6] 3788 v = {4,5,6} [size = nz = 6] 3789 3790 .keywords: matrix, aij, compressed row, sparse, parallel 3791 3792 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3793 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3794 @*/ 3795 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3796 { 3797 PetscErrorCode ierr; 3798 3799 PetscFunctionBegin; 3800 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3801 PetscFunctionReturn(0); 3802 } 3803 3804 #undef __FUNCT__ 3805 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3806 /*@C 3807 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3808 (the default parallel PETSc format). For good matrix assembly performance 3809 the user should preallocate the matrix storage by setting the parameters 3810 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3811 performance can be increased by more than a factor of 50. 3812 3813 Collective on MPI_Comm 3814 3815 Input Parameters: 3816 + B - the matrix 3817 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3818 (same value is used for all local rows) 3819 . d_nnz - array containing the number of nonzeros in the various rows of the 3820 DIAGONAL portion of the local submatrix (possibly different for each row) 3821 or NULL, if d_nz is used to specify the nonzero structure. 3822 The size of this array is equal to the number of local rows, i.e 'm'. 3823 For matrices that will be factored, you must leave room for (and set) 3824 the diagonal entry even if it is zero. 3825 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3826 submatrix (same value is used for all local rows). 3827 - o_nnz - array containing the number of nonzeros in the various rows of the 3828 OFF-DIAGONAL portion of the local submatrix (possibly different for 3829 each row) or NULL, if o_nz is used to specify the nonzero 3830 structure. The size of this array is equal to the number 3831 of local rows, i.e 'm'. 3832 3833 If the *_nnz parameter is given then the *_nz parameter is ignored 3834 3835 The AIJ format (also called the Yale sparse matrix format or 3836 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3837 storage. The stored row and column indices begin with zero. 3838 See Users-Manual: ch_mat for details. 3839 3840 The parallel matrix is partitioned such that the first m0 rows belong to 3841 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3842 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3843 3844 The DIAGONAL portion of the local submatrix of a processor can be defined 3845 as the submatrix which is obtained by extraction the part corresponding to 3846 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3847 first row that belongs to the processor, r2 is the last row belonging to 3848 the this processor, and c1-c2 is range of indices of the local part of a 3849 vector suitable for applying the matrix to. This is an mxn matrix. In the 3850 common case of a square matrix, the row and column ranges are the same and 3851 the DIAGONAL part is also square. The remaining portion of the local 3852 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3853 3854 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3855 3856 You can call MatGetInfo() to get information on how effective the preallocation was; 3857 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3858 You can also run with the option -info and look for messages with the string 3859 malloc in them to see if additional memory allocation was needed. 3860 3861 Example usage: 3862 3863 Consider the following 8x8 matrix with 34 non-zero values, that is 3864 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3865 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3866 as follows: 3867 3868 .vb 3869 1 2 0 | 0 3 0 | 0 4 3870 Proc0 0 5 6 | 7 0 0 | 8 0 3871 9 0 10 | 11 0 0 | 12 0 3872 ------------------------------------- 3873 13 0 14 | 15 16 17 | 0 0 3874 Proc1 0 18 0 | 19 20 21 | 0 0 3875 0 0 0 | 22 23 0 | 24 0 3876 ------------------------------------- 3877 Proc2 25 26 27 | 0 0 28 | 29 0 3878 30 0 0 | 31 32 33 | 0 34 3879 .ve 3880 3881 This can be represented as a collection of submatrices as: 3882 3883 .vb 3884 A B C 3885 D E F 3886 G H I 3887 .ve 3888 3889 Where the submatrices A,B,C are owned by proc0, D,E,F are 3890 owned by proc1, G,H,I are owned by proc2. 3891 3892 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3893 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3894 The 'M','N' parameters are 8,8, and have the same values on all procs. 3895 3896 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3897 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3898 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3899 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3900 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3901 matrix, ans [DF] as another SeqAIJ matrix. 3902 3903 When d_nz, o_nz parameters are specified, d_nz storage elements are 3904 allocated for every row of the local diagonal submatrix, and o_nz 3905 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3906 One way to choose d_nz and o_nz is to use the max nonzerors per local 3907 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3908 In this case, the values of d_nz,o_nz are: 3909 .vb 3910 proc0 : dnz = 2, o_nz = 2 3911 proc1 : dnz = 3, o_nz = 2 3912 proc2 : dnz = 1, o_nz = 4 3913 .ve 3914 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3915 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3916 for proc3. i.e we are using 12+15+10=37 storage locations to store 3917 34 values. 3918 3919 When d_nnz, o_nnz parameters are specified, the storage is specified 3920 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3921 In the above case the values for d_nnz,o_nnz are: 3922 .vb 3923 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3924 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3925 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3926 .ve 3927 Here the space allocated is sum of all the above values i.e 34, and 3928 hence pre-allocation is perfect. 3929 3930 Level: intermediate 3931 3932 .keywords: matrix, aij, compressed row, sparse, parallel 3933 3934 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3935 MPIAIJ, MatGetInfo(), PetscSplitOwnership() 3936 @*/ 3937 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3938 { 3939 PetscErrorCode ierr; 3940 3941 PetscFunctionBegin; 3942 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3943 PetscValidType(B,1); 3944 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3945 PetscFunctionReturn(0); 3946 } 3947 3948 #undef __FUNCT__ 3949 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3950 /*@ 3951 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3952 CSR format the local rows. 3953 3954 Collective on MPI_Comm 3955 3956 Input Parameters: 3957 + comm - MPI communicator 3958 . m - number of local rows (Cannot be PETSC_DECIDE) 3959 . n - This value should be the same as the local size used in creating the 3960 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3961 calculated if N is given) For square matrices n is almost always m. 3962 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3963 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3964 . i - row indices 3965 . j - column indices 3966 - a - matrix values 3967 3968 Output Parameter: 3969 . mat - the matrix 3970 3971 Level: intermediate 3972 3973 Notes: 3974 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3975 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3976 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3977 3978 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3979 3980 The format which is used for the sparse matrix input, is equivalent to a 3981 row-major ordering.. i.e for the following matrix, the input data expected is 3982 as shown: 3983 3984 1 0 0 3985 2 0 3 P0 3986 ------- 3987 4 5 6 P1 3988 3989 Process0 [P0]: rows_owned=[0,1] 3990 i = {0,1,3} [size = nrow+1 = 2+1] 3991 j = {0,0,2} [size = nz = 6] 3992 v = {1,2,3} [size = nz = 6] 3993 3994 Process1 [P1]: rows_owned=[2] 3995 i = {0,3} [size = nrow+1 = 1+1] 3996 j = {0,1,2} [size = nz = 6] 3997 v = {4,5,6} [size = nz = 6] 3998 3999 .keywords: matrix, aij, compressed row, sparse, parallel 4000 4001 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4002 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4003 @*/ 4004 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4005 { 4006 PetscErrorCode ierr; 4007 4008 PetscFunctionBegin; 4009 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4010 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4011 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4012 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4013 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4014 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4015 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4016 PetscFunctionReturn(0); 4017 } 4018 4019 #undef __FUNCT__ 4020 #define __FUNCT__ "MatCreateAIJ" 4021 /*@C 4022 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4023 (the default parallel PETSc format). For good matrix assembly performance 4024 the user should preallocate the matrix storage by setting the parameters 4025 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4026 performance can be increased by more than a factor of 50. 4027 4028 Collective on MPI_Comm 4029 4030 Input Parameters: 4031 + comm - MPI communicator 4032 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4033 This value should be the same as the local size used in creating the 4034 y vector for the matrix-vector product y = Ax. 4035 . n - This value should be the same as the local size used in creating the 4036 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4037 calculated if N is given) For square matrices n is almost always m. 4038 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4039 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4040 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4041 (same value is used for all local rows) 4042 . d_nnz - array containing the number of nonzeros in the various rows of the 4043 DIAGONAL portion of the local submatrix (possibly different for each row) 4044 or NULL, if d_nz is used to specify the nonzero structure. 4045 The size of this array is equal to the number of local rows, i.e 'm'. 4046 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4047 submatrix (same value is used for all local rows). 4048 - o_nnz - array containing the number of nonzeros in the various rows of the 4049 OFF-DIAGONAL portion of the local submatrix (possibly different for 4050 each row) or NULL, if o_nz is used to specify the nonzero 4051 structure. The size of this array is equal to the number 4052 of local rows, i.e 'm'. 4053 4054 Output Parameter: 4055 . A - the matrix 4056 4057 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4058 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4059 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4060 4061 Notes: 4062 If the *_nnz parameter is given then the *_nz parameter is ignored 4063 4064 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4065 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4066 storage requirements for this matrix. 4067 4068 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4069 processor than it must be used on all processors that share the object for 4070 that argument. 4071 4072 The user MUST specify either the local or global matrix dimensions 4073 (possibly both). 4074 4075 The parallel matrix is partitioned across processors such that the 4076 first m0 rows belong to process 0, the next m1 rows belong to 4077 process 1, the next m2 rows belong to process 2 etc.. where 4078 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4079 values corresponding to [m x N] submatrix. 4080 4081 The columns are logically partitioned with the n0 columns belonging 4082 to 0th partition, the next n1 columns belonging to the next 4083 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4084 4085 The DIAGONAL portion of the local submatrix on any given processor 4086 is the submatrix corresponding to the rows and columns m,n 4087 corresponding to the given processor. i.e diagonal matrix on 4088 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4089 etc. The remaining portion of the local submatrix [m x (N-n)] 4090 constitute the OFF-DIAGONAL portion. The example below better 4091 illustrates this concept. 4092 4093 For a square global matrix we define each processor's diagonal portion 4094 to be its local rows and the corresponding columns (a square submatrix); 4095 each processor's off-diagonal portion encompasses the remainder of the 4096 local matrix (a rectangular submatrix). 4097 4098 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4099 4100 When calling this routine with a single process communicator, a matrix of 4101 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4102 type of communicator, use the construction mechanism: 4103 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4104 4105 By default, this format uses inodes (identical nodes) when possible. 4106 We search for consecutive rows with the same nonzero structure, thereby 4107 reusing matrix information to achieve increased efficiency. 4108 4109 Options Database Keys: 4110 + -mat_no_inode - Do not use inodes 4111 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4112 - -mat_aij_oneindex - Internally use indexing starting at 1 4113 rather than 0. Note that when calling MatSetValues(), 4114 the user still MUST index entries starting at 0! 4115 4116 4117 Example usage: 4118 4119 Consider the following 8x8 matrix with 34 non-zero values, that is 4120 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4121 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4122 as follows: 4123 4124 .vb 4125 1 2 0 | 0 3 0 | 0 4 4126 Proc0 0 5 6 | 7 0 0 | 8 0 4127 9 0 10 | 11 0 0 | 12 0 4128 ------------------------------------- 4129 13 0 14 | 15 16 17 | 0 0 4130 Proc1 0 18 0 | 19 20 21 | 0 0 4131 0 0 0 | 22 23 0 | 24 0 4132 ------------------------------------- 4133 Proc2 25 26 27 | 0 0 28 | 29 0 4134 30 0 0 | 31 32 33 | 0 34 4135 .ve 4136 4137 This can be represented as a collection of submatrices as: 4138 4139 .vb 4140 A B C 4141 D E F 4142 G H I 4143 .ve 4144 4145 Where the submatrices A,B,C are owned by proc0, D,E,F are 4146 owned by proc1, G,H,I are owned by proc2. 4147 4148 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4149 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4150 The 'M','N' parameters are 8,8, and have the same values on all procs. 4151 4152 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4153 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4154 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4155 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4156 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4157 matrix, ans [DF] as another SeqAIJ matrix. 4158 4159 When d_nz, o_nz parameters are specified, d_nz storage elements are 4160 allocated for every row of the local diagonal submatrix, and o_nz 4161 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4162 One way to choose d_nz and o_nz is to use the max nonzerors per local 4163 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4164 In this case, the values of d_nz,o_nz are: 4165 .vb 4166 proc0 : dnz = 2, o_nz = 2 4167 proc1 : dnz = 3, o_nz = 2 4168 proc2 : dnz = 1, o_nz = 4 4169 .ve 4170 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4171 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4172 for proc3. i.e we are using 12+15+10=37 storage locations to store 4173 34 values. 4174 4175 When d_nnz, o_nnz parameters are specified, the storage is specified 4176 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4177 In the above case the values for d_nnz,o_nnz are: 4178 .vb 4179 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4180 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4181 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4182 .ve 4183 Here the space allocated is sum of all the above values i.e 34, and 4184 hence pre-allocation is perfect. 4185 4186 Level: intermediate 4187 4188 .keywords: matrix, aij, compressed row, sparse, parallel 4189 4190 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4191 MPIAIJ, MatCreateMPIAIJWithArrays() 4192 @*/ 4193 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4194 { 4195 PetscErrorCode ierr; 4196 PetscMPIInt size; 4197 4198 PetscFunctionBegin; 4199 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4200 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4201 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4202 if (size > 1) { 4203 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4204 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4205 } else { 4206 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4207 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4208 } 4209 PetscFunctionReturn(0); 4210 } 4211 4212 #undef __FUNCT__ 4213 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4214 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4215 { 4216 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4217 4218 PetscFunctionBegin; 4219 if (Ad) *Ad = a->A; 4220 if (Ao) *Ao = a->B; 4221 if (colmap) *colmap = a->garray; 4222 PetscFunctionReturn(0); 4223 } 4224 4225 #undef __FUNCT__ 4226 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4227 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4228 { 4229 PetscErrorCode ierr; 4230 PetscInt i; 4231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4232 4233 PetscFunctionBegin; 4234 if (coloring->ctype == IS_COLORING_GLOBAL) { 4235 ISColoringValue *allcolors,*colors; 4236 ISColoring ocoloring; 4237 4238 /* set coloring for diagonal portion */ 4239 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4240 4241 /* set coloring for off-diagonal portion */ 4242 ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr); 4243 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4244 for (i=0; i<a->B->cmap->n; i++) { 4245 colors[i] = allcolors[a->garray[i]]; 4246 } 4247 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4248 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4249 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4250 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4251 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4252 ISColoringValue *colors; 4253 PetscInt *larray; 4254 ISColoring ocoloring; 4255 4256 /* set coloring for diagonal portion */ 4257 ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr); 4258 for (i=0; i<a->A->cmap->n; i++) { 4259 larray[i] = i + A->cmap->rstart; 4260 } 4261 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr); 4262 ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr); 4263 for (i=0; i<a->A->cmap->n; i++) { 4264 colors[i] = coloring->colors[larray[i]]; 4265 } 4266 ierr = PetscFree(larray);CHKERRQ(ierr); 4267 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4268 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4269 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4270 4271 /* set coloring for off-diagonal portion */ 4272 ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr); 4273 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr); 4274 ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr); 4275 for (i=0; i<a->B->cmap->n; i++) { 4276 colors[i] = coloring->colors[larray[i]]; 4277 } 4278 ierr = PetscFree(larray);CHKERRQ(ierr); 4279 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4280 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4281 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4282 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4283 PetscFunctionReturn(0); 4284 } 4285 4286 #undef __FUNCT__ 4287 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4288 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4289 { 4290 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4291 PetscErrorCode ierr; 4292 4293 PetscFunctionBegin; 4294 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4295 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4296 PetscFunctionReturn(0); 4297 } 4298 4299 #undef __FUNCT__ 4300 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4301 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4302 { 4303 PetscErrorCode ierr; 4304 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4305 PetscInt *indx; 4306 4307 PetscFunctionBegin; 4308 /* This routine will ONLY return MPIAIJ type matrix */ 4309 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4310 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4311 if (n == PETSC_DECIDE) { 4312 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4313 } 4314 /* Check sum(n) = N */ 4315 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4316 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4317 4318 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4319 rstart -= m; 4320 4321 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4322 for (i=0; i<m; i++) { 4323 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4324 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4325 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4326 } 4327 4328 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4329 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4330 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4331 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 4332 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4333 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4334 PetscFunctionReturn(0); 4335 } 4336 4337 #undef __FUNCT__ 4338 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4339 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4340 { 4341 PetscErrorCode ierr; 4342 PetscInt m,N,i,rstart,nnz,Ii; 4343 PetscInt *indx; 4344 PetscScalar *values; 4345 4346 PetscFunctionBegin; 4347 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4348 ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr); 4349 for (i=0; i<m; i++) { 4350 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4351 Ii = i + rstart; 4352 ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4353 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4354 } 4355 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4356 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4357 PetscFunctionReturn(0); 4358 } 4359 4360 #undef __FUNCT__ 4361 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4362 /*@ 4363 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4364 matrices from each processor 4365 4366 Collective on MPI_Comm 4367 4368 Input Parameters: 4369 + comm - the communicators the parallel matrix will live on 4370 . inmat - the input sequential matrices 4371 . n - number of local columns (or PETSC_DECIDE) 4372 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4373 4374 Output Parameter: 4375 . outmat - the parallel matrix generated 4376 4377 Level: advanced 4378 4379 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4380 4381 @*/ 4382 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4383 { 4384 PetscErrorCode ierr; 4385 PetscMPIInt size; 4386 4387 PetscFunctionBegin; 4388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4389 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4390 if (size == 1) { 4391 if (scall == MAT_INITIAL_MATRIX) { 4392 ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr); 4393 } else { 4394 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4395 } 4396 } else { 4397 if (scall == MAT_INITIAL_MATRIX) { 4398 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4399 } 4400 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4401 } 4402 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4403 PetscFunctionReturn(0); 4404 } 4405 4406 #undef __FUNCT__ 4407 #define __FUNCT__ "MatFileSplit" 4408 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4409 { 4410 PetscErrorCode ierr; 4411 PetscMPIInt rank; 4412 PetscInt m,N,i,rstart,nnz; 4413 size_t len; 4414 const PetscInt *indx; 4415 PetscViewer out; 4416 char *name; 4417 Mat B; 4418 const PetscScalar *values; 4419 4420 PetscFunctionBegin; 4421 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4422 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4423 /* Should this be the type of the diagonal block of A? */ 4424 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4425 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4426 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4427 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4428 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4429 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4430 for (i=0; i<m; i++) { 4431 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4432 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4433 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4434 } 4435 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4436 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4437 4438 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4439 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4440 ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr); 4441 sprintf(name,"%s.%d",outfile,rank); 4442 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4443 ierr = PetscFree(name);CHKERRQ(ierr); 4444 ierr = MatView(B,out);CHKERRQ(ierr); 4445 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4446 ierr = MatDestroy(&B);CHKERRQ(ierr); 4447 PetscFunctionReturn(0); 4448 } 4449 4450 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4451 #undef __FUNCT__ 4452 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4453 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4454 { 4455 PetscErrorCode ierr; 4456 Mat_Merge_SeqsToMPI *merge; 4457 PetscContainer container; 4458 4459 PetscFunctionBegin; 4460 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4461 if (container) { 4462 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4468 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4469 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4470 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4471 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4472 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4473 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4474 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4475 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4476 ierr = PetscFree(merge);CHKERRQ(ierr); 4477 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4478 } 4479 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4480 PetscFunctionReturn(0); 4481 } 4482 4483 #include <../src/mat/utils/freespace.h> 4484 #include <petscbt.h> 4485 4486 #undef __FUNCT__ 4487 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4488 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4489 { 4490 PetscErrorCode ierr; 4491 MPI_Comm comm; 4492 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4493 PetscMPIInt size,rank,taga,*len_s; 4494 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4495 PetscInt proc,m; 4496 PetscInt **buf_ri,**buf_rj; 4497 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4498 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4499 MPI_Request *s_waits,*r_waits; 4500 MPI_Status *status; 4501 MatScalar *aa=a->a; 4502 MatScalar **abuf_r,*ba_i; 4503 Mat_Merge_SeqsToMPI *merge; 4504 PetscContainer container; 4505 4506 PetscFunctionBegin; 4507 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4508 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4509 4510 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4511 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4512 4513 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4514 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4515 4516 bi = merge->bi; 4517 bj = merge->bj; 4518 buf_ri = merge->buf_ri; 4519 buf_rj = merge->buf_rj; 4520 4521 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4522 owners = merge->rowmap->range; 4523 len_s = merge->len_s; 4524 4525 /* send and recv matrix values */ 4526 /*-----------------------------*/ 4527 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4528 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4529 4530 ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr); 4531 for (proc=0,k=0; proc<size; proc++) { 4532 if (!len_s[proc]) continue; 4533 i = owners[proc]; 4534 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4535 k++; 4536 } 4537 4538 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4539 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4540 ierr = PetscFree(status);CHKERRQ(ierr); 4541 4542 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4543 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4544 4545 /* insert mat values of mpimat */ 4546 /*----------------------------*/ 4547 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4548 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4549 4550 for (k=0; k<merge->nrecv; k++) { 4551 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4552 nrows = *(buf_ri_k[k]); 4553 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4554 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4555 } 4556 4557 /* set values of ba */ 4558 m = merge->rowmap->n; 4559 for (i=0; i<m; i++) { 4560 arow = owners[rank] + i; 4561 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4562 bnzi = bi[i+1] - bi[i]; 4563 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4564 4565 /* add local non-zero vals of this proc's seqmat into ba */ 4566 anzi = ai[arow+1] - ai[arow]; 4567 aj = a->j + ai[arow]; 4568 aa = a->a + ai[arow]; 4569 nextaj = 0; 4570 for (j=0; nextaj<anzi; j++) { 4571 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4572 ba_i[j] += aa[nextaj++]; 4573 } 4574 } 4575 4576 /* add received vals into ba */ 4577 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4578 /* i-th row */ 4579 if (i == *nextrow[k]) { 4580 anzi = *(nextai[k]+1) - *nextai[k]; 4581 aj = buf_rj[k] + *(nextai[k]); 4582 aa = abuf_r[k] + *(nextai[k]); 4583 nextaj = 0; 4584 for (j=0; nextaj<anzi; j++) { 4585 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4586 ba_i[j] += aa[nextaj++]; 4587 } 4588 } 4589 nextrow[k]++; nextai[k]++; 4590 } 4591 } 4592 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4593 } 4594 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4595 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4596 4597 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4598 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4599 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4600 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4601 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4602 PetscFunctionReturn(0); 4603 } 4604 4605 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4606 4607 #undef __FUNCT__ 4608 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4609 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4610 { 4611 PetscErrorCode ierr; 4612 Mat B_mpi; 4613 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4614 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4615 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4616 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4617 PetscInt len,proc,*dnz,*onz,bs,cbs; 4618 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4619 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4620 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4621 MPI_Status *status; 4622 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4623 PetscBT lnkbt; 4624 Mat_Merge_SeqsToMPI *merge; 4625 PetscContainer container; 4626 4627 PetscFunctionBegin; 4628 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4629 4630 /* make sure it is a PETSc comm */ 4631 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4632 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4633 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4634 4635 ierr = PetscNew(&merge);CHKERRQ(ierr); 4636 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4637 4638 /* determine row ownership */ 4639 /*---------------------------------------------------------*/ 4640 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4641 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4642 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4643 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4644 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4645 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4646 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4647 4648 m = merge->rowmap->n; 4649 owners = merge->rowmap->range; 4650 4651 /* determine the number of messages to send, their lengths */ 4652 /*---------------------------------------------------------*/ 4653 len_s = merge->len_s; 4654 4655 len = 0; /* length of buf_si[] */ 4656 merge->nsend = 0; 4657 for (proc=0; proc<size; proc++) { 4658 len_si[proc] = 0; 4659 if (proc == rank) { 4660 len_s[proc] = 0; 4661 } else { 4662 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4663 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4664 } 4665 if (len_s[proc]) { 4666 merge->nsend++; 4667 nrows = 0; 4668 for (i=owners[proc]; i<owners[proc+1]; i++) { 4669 if (ai[i+1] > ai[i]) nrows++; 4670 } 4671 len_si[proc] = 2*(nrows+1); 4672 len += len_si[proc]; 4673 } 4674 } 4675 4676 /* determine the number and length of messages to receive for ij-structure */ 4677 /*-------------------------------------------------------------------------*/ 4678 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4679 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4680 4681 /* post the Irecv of j-structure */ 4682 /*-------------------------------*/ 4683 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4684 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4685 4686 /* post the Isend of j-structure */ 4687 /*--------------------------------*/ 4688 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4689 4690 for (proc=0, k=0; proc<size; proc++) { 4691 if (!len_s[proc]) continue; 4692 i = owners[proc]; 4693 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4694 k++; 4695 } 4696 4697 /* receives and sends of j-structure are complete */ 4698 /*------------------------------------------------*/ 4699 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4700 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4701 4702 /* send and recv i-structure */ 4703 /*---------------------------*/ 4704 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4705 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4706 4707 ierr = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr); 4708 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4709 for (proc=0,k=0; proc<size; proc++) { 4710 if (!len_s[proc]) continue; 4711 /* form outgoing message for i-structure: 4712 buf_si[0]: nrows to be sent 4713 [1:nrows]: row index (global) 4714 [nrows+1:2*nrows+1]: i-structure index 4715 */ 4716 /*-------------------------------------------*/ 4717 nrows = len_si[proc]/2 - 1; 4718 buf_si_i = buf_si + nrows+1; 4719 buf_si[0] = nrows; 4720 buf_si_i[0] = 0; 4721 nrows = 0; 4722 for (i=owners[proc]; i<owners[proc+1]; i++) { 4723 anzi = ai[i+1] - ai[i]; 4724 if (anzi) { 4725 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4726 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4727 nrows++; 4728 } 4729 } 4730 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4731 k++; 4732 buf_si += len_si[proc]; 4733 } 4734 4735 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4736 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4737 4738 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4739 for (i=0; i<merge->nrecv; i++) { 4740 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4741 } 4742 4743 ierr = PetscFree(len_si);CHKERRQ(ierr); 4744 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4745 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4746 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4747 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4748 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4749 ierr = PetscFree(status);CHKERRQ(ierr); 4750 4751 /* compute a local seq matrix in each processor */ 4752 /*----------------------------------------------*/ 4753 /* allocate bi array and free space for accumulating nonzero column info */ 4754 ierr = PetscMalloc1((m+1),&bi);CHKERRQ(ierr); 4755 bi[0] = 0; 4756 4757 /* create and initialize a linked list */ 4758 nlnk = N+1; 4759 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4760 4761 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4762 len = ai[owners[rank+1]] - ai[owners[rank]]; 4763 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4764 4765 current_space = free_space; 4766 4767 /* determine symbolic info for each local row */ 4768 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4769 4770 for (k=0; k<merge->nrecv; k++) { 4771 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4772 nrows = *buf_ri_k[k]; 4773 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4774 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4775 } 4776 4777 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4778 len = 0; 4779 for (i=0; i<m; i++) { 4780 bnzi = 0; 4781 /* add local non-zero cols of this proc's seqmat into lnk */ 4782 arow = owners[rank] + i; 4783 anzi = ai[arow+1] - ai[arow]; 4784 aj = a->j + ai[arow]; 4785 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4786 bnzi += nlnk; 4787 /* add received col data into lnk */ 4788 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4789 if (i == *nextrow[k]) { /* i-th row */ 4790 anzi = *(nextai[k]+1) - *nextai[k]; 4791 aj = buf_rj[k] + *nextai[k]; 4792 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4793 bnzi += nlnk; 4794 nextrow[k]++; nextai[k]++; 4795 } 4796 } 4797 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4798 4799 /* if free space is not available, make more free space */ 4800 if (current_space->local_remaining<bnzi) { 4801 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4802 nspacedouble++; 4803 } 4804 /* copy data into free space, then initialize lnk */ 4805 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4806 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4807 4808 current_space->array += bnzi; 4809 current_space->local_used += bnzi; 4810 current_space->local_remaining -= bnzi; 4811 4812 bi[i+1] = bi[i] + bnzi; 4813 } 4814 4815 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4816 4817 ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr); 4818 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4819 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4820 4821 /* create symbolic parallel matrix B_mpi */ 4822 /*---------------------------------------*/ 4823 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4824 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4825 if (n==PETSC_DECIDE) { 4826 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4827 } else { 4828 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4829 } 4830 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4831 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4832 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4833 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4834 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4835 4836 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4837 B_mpi->assembled = PETSC_FALSE; 4838 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4839 merge->bi = bi; 4840 merge->bj = bj; 4841 merge->buf_ri = buf_ri; 4842 merge->buf_rj = buf_rj; 4843 merge->coi = NULL; 4844 merge->coj = NULL; 4845 merge->owners_co = NULL; 4846 4847 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4848 4849 /* attach the supporting struct to B_mpi for reuse */ 4850 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4851 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4852 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4853 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4854 *mpimat = B_mpi; 4855 4856 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4857 PetscFunctionReturn(0); 4858 } 4859 4860 #undef __FUNCT__ 4861 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4862 /*@C 4863 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4864 matrices from each processor 4865 4866 Collective on MPI_Comm 4867 4868 Input Parameters: 4869 + comm - the communicators the parallel matrix will live on 4870 . seqmat - the input sequential matrices 4871 . m - number of local rows (or PETSC_DECIDE) 4872 . n - number of local columns (or PETSC_DECIDE) 4873 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4874 4875 Output Parameter: 4876 . mpimat - the parallel matrix generated 4877 4878 Level: advanced 4879 4880 Notes: 4881 The dimensions of the sequential matrix in each processor MUST be the same. 4882 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4883 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4884 @*/ 4885 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4886 { 4887 PetscErrorCode ierr; 4888 PetscMPIInt size; 4889 4890 PetscFunctionBegin; 4891 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4892 if (size == 1) { 4893 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4894 if (scall == MAT_INITIAL_MATRIX) { 4895 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4896 } else { 4897 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4898 } 4899 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4900 PetscFunctionReturn(0); 4901 } 4902 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4903 if (scall == MAT_INITIAL_MATRIX) { 4904 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4905 } 4906 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4907 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4908 PetscFunctionReturn(0); 4909 } 4910 4911 #undef __FUNCT__ 4912 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4913 /*@ 4914 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4915 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4916 with MatGetSize() 4917 4918 Not Collective 4919 4920 Input Parameters: 4921 + A - the matrix 4922 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4923 4924 Output Parameter: 4925 . A_loc - the local sequential matrix generated 4926 4927 Level: developer 4928 4929 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4930 4931 @*/ 4932 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4933 { 4934 PetscErrorCode ierr; 4935 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4936 Mat_SeqAIJ *mat,*a,*b; 4937 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4938 MatScalar *aa,*ba,*cam; 4939 PetscScalar *ca; 4940 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4941 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4942 PetscBool match; 4943 MPI_Comm comm; 4944 PetscMPIInt size; 4945 4946 PetscFunctionBegin; 4947 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4948 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4949 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4950 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4951 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4952 4953 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4954 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4955 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4956 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4957 aa = a->a; ba = b->a; 4958 if (scall == MAT_INITIAL_MATRIX) { 4959 if (size == 1) { 4960 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4961 PetscFunctionReturn(0); 4962 } 4963 4964 ierr = PetscMalloc1((1+am),&ci);CHKERRQ(ierr); 4965 ci[0] = 0; 4966 for (i=0; i<am; i++) { 4967 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4968 } 4969 ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr); 4970 ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr); 4971 k = 0; 4972 for (i=0; i<am; i++) { 4973 ncols_o = bi[i+1] - bi[i]; 4974 ncols_d = ai[i+1] - ai[i]; 4975 /* off-diagonal portion of A */ 4976 for (jo=0; jo<ncols_o; jo++) { 4977 col = cmap[*bj]; 4978 if (col >= cstart) break; 4979 cj[k] = col; bj++; 4980 ca[k++] = *ba++; 4981 } 4982 /* diagonal portion of A */ 4983 for (j=0; j<ncols_d; j++) { 4984 cj[k] = cstart + *aj++; 4985 ca[k++] = *aa++; 4986 } 4987 /* off-diagonal portion of A */ 4988 for (j=jo; j<ncols_o; j++) { 4989 cj[k] = cmap[*bj++]; 4990 ca[k++] = *ba++; 4991 } 4992 } 4993 /* put together the new matrix */ 4994 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4995 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4996 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4997 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4998 mat->free_a = PETSC_TRUE; 4999 mat->free_ij = PETSC_TRUE; 5000 mat->nonew = 0; 5001 } else if (scall == MAT_REUSE_MATRIX) { 5002 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5003 ci = mat->i; cj = mat->j; cam = mat->a; 5004 for (i=0; i<am; i++) { 5005 /* off-diagonal portion of A */ 5006 ncols_o = bi[i+1] - bi[i]; 5007 for (jo=0; jo<ncols_o; jo++) { 5008 col = cmap[*bj]; 5009 if (col >= cstart) break; 5010 *cam++ = *ba++; bj++; 5011 } 5012 /* diagonal portion of A */ 5013 ncols_d = ai[i+1] - ai[i]; 5014 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5015 /* off-diagonal portion of A */ 5016 for (j=jo; j<ncols_o; j++) { 5017 *cam++ = *ba++; bj++; 5018 } 5019 } 5020 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5021 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5022 PetscFunctionReturn(0); 5023 } 5024 5025 #undef __FUNCT__ 5026 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5027 /*@C 5028 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5029 5030 Not Collective 5031 5032 Input Parameters: 5033 + A - the matrix 5034 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5035 - row, col - index sets of rows and columns to extract (or NULL) 5036 5037 Output Parameter: 5038 . A_loc - the local sequential matrix generated 5039 5040 Level: developer 5041 5042 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5043 5044 @*/ 5045 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5046 { 5047 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5048 PetscErrorCode ierr; 5049 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5050 IS isrowa,iscola; 5051 Mat *aloc; 5052 PetscBool match; 5053 5054 PetscFunctionBegin; 5055 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5056 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5057 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5058 if (!row) { 5059 start = A->rmap->rstart; end = A->rmap->rend; 5060 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5061 } else { 5062 isrowa = *row; 5063 } 5064 if (!col) { 5065 start = A->cmap->rstart; 5066 cmap = a->garray; 5067 nzA = a->A->cmap->n; 5068 nzB = a->B->cmap->n; 5069 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5070 ncols = 0; 5071 for (i=0; i<nzB; i++) { 5072 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5073 else break; 5074 } 5075 imark = i; 5076 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5077 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5078 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5079 } else { 5080 iscola = *col; 5081 } 5082 if (scall != MAT_INITIAL_MATRIX) { 5083 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5084 aloc[0] = *A_loc; 5085 } 5086 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5087 *A_loc = aloc[0]; 5088 ierr = PetscFree(aloc);CHKERRQ(ierr); 5089 if (!row) { 5090 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5091 } 5092 if (!col) { 5093 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5094 } 5095 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5096 PetscFunctionReturn(0); 5097 } 5098 5099 #undef __FUNCT__ 5100 #define __FUNCT__ "MatGetBrowsOfAcols" 5101 /*@C 5102 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5103 5104 Collective on Mat 5105 5106 Input Parameters: 5107 + A,B - the matrices in mpiaij format 5108 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5109 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5110 5111 Output Parameter: 5112 + rowb, colb - index sets of rows and columns of B to extract 5113 - B_seq - the sequential matrix generated 5114 5115 Level: developer 5116 5117 @*/ 5118 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5119 { 5120 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5121 PetscErrorCode ierr; 5122 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5123 IS isrowb,iscolb; 5124 Mat *bseq=NULL; 5125 5126 PetscFunctionBegin; 5127 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5128 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5129 } 5130 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5131 5132 if (scall == MAT_INITIAL_MATRIX) { 5133 start = A->cmap->rstart; 5134 cmap = a->garray; 5135 nzA = a->A->cmap->n; 5136 nzB = a->B->cmap->n; 5137 ierr = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr); 5138 ncols = 0; 5139 for (i=0; i<nzB; i++) { /* row < local row index */ 5140 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5141 else break; 5142 } 5143 imark = i; 5144 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5145 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5146 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5147 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5148 } else { 5149 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5150 isrowb = *rowb; iscolb = *colb; 5151 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5152 bseq[0] = *B_seq; 5153 } 5154 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5155 *B_seq = bseq[0]; 5156 ierr = PetscFree(bseq);CHKERRQ(ierr); 5157 if (!rowb) { 5158 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5159 } else { 5160 *rowb = isrowb; 5161 } 5162 if (!colb) { 5163 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5164 } else { 5165 *colb = iscolb; 5166 } 5167 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5168 PetscFunctionReturn(0); 5169 } 5170 5171 #undef __FUNCT__ 5172 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5173 /* 5174 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5175 of the OFF-DIAGONAL portion of local A 5176 5177 Collective on Mat 5178 5179 Input Parameters: 5180 + A,B - the matrices in mpiaij format 5181 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5182 5183 Output Parameter: 5184 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5185 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5186 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5187 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5188 5189 Level: developer 5190 5191 */ 5192 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5193 { 5194 VecScatter_MPI_General *gen_to,*gen_from; 5195 PetscErrorCode ierr; 5196 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5197 Mat_SeqAIJ *b_oth; 5198 VecScatter ctx =a->Mvctx; 5199 MPI_Comm comm; 5200 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5201 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5202 PetscScalar *rvalues,*svalues; 5203 MatScalar *b_otha,*bufa,*bufA; 5204 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5205 MPI_Request *rwaits = NULL,*swaits = NULL; 5206 MPI_Status *sstatus,rstatus; 5207 PetscMPIInt jj,size; 5208 PetscInt *cols,sbs,rbs; 5209 PetscScalar *vals; 5210 5211 PetscFunctionBegin; 5212 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5213 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5214 if (size == 1) PetscFunctionReturn(0); 5215 5216 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5217 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5218 } 5219 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5220 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5221 5222 gen_to = (VecScatter_MPI_General*)ctx->todata; 5223 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5224 rvalues = gen_from->values; /* holds the length of receiving row */ 5225 svalues = gen_to->values; /* holds the length of sending row */ 5226 nrecvs = gen_from->n; 5227 nsends = gen_to->n; 5228 5229 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5230 srow = gen_to->indices; /* local row index to be sent */ 5231 sstarts = gen_to->starts; 5232 sprocs = gen_to->procs; 5233 sstatus = gen_to->sstatus; 5234 sbs = gen_to->bs; 5235 rstarts = gen_from->starts; 5236 rprocs = gen_from->procs; 5237 rbs = gen_from->bs; 5238 5239 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5240 if (scall == MAT_INITIAL_MATRIX) { 5241 /* i-array */ 5242 /*---------*/ 5243 /* post receives */ 5244 for (i=0; i<nrecvs; i++) { 5245 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5246 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5247 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5248 } 5249 5250 /* pack the outgoing message */ 5251 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5252 5253 sstartsj[0] = 0; 5254 rstartsj[0] = 0; 5255 len = 0; /* total length of j or a array to be sent */ 5256 k = 0; 5257 for (i=0; i<nsends; i++) { 5258 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5259 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5260 for (j=0; j<nrows; j++) { 5261 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5262 for (l=0; l<sbs; l++) { 5263 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5264 5265 rowlen[j*sbs+l] = ncols; 5266 5267 len += ncols; 5268 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5269 } 5270 k++; 5271 } 5272 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5273 5274 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5275 } 5276 /* recvs and sends of i-array are completed */ 5277 i = nrecvs; 5278 while (i--) { 5279 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5280 } 5281 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5282 5283 /* allocate buffers for sending j and a arrays */ 5284 ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr); 5285 ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr); 5286 5287 /* create i-array of B_oth */ 5288 ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr); 5289 5290 b_othi[0] = 0; 5291 len = 0; /* total length of j or a array to be received */ 5292 k = 0; 5293 for (i=0; i<nrecvs; i++) { 5294 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5295 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5296 for (j=0; j<nrows; j++) { 5297 b_othi[k+1] = b_othi[k] + rowlen[j]; 5298 len += rowlen[j]; k++; 5299 } 5300 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5301 } 5302 5303 /* allocate space for j and a arrrays of B_oth */ 5304 ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr); 5305 ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr); 5306 5307 /* j-array */ 5308 /*---------*/ 5309 /* post receives of j-array */ 5310 for (i=0; i<nrecvs; i++) { 5311 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5312 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5313 } 5314 5315 /* pack the outgoing message j-array */ 5316 k = 0; 5317 for (i=0; i<nsends; i++) { 5318 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5319 bufJ = bufj+sstartsj[i]; 5320 for (j=0; j<nrows; j++) { 5321 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5322 for (ll=0; ll<sbs; ll++) { 5323 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5324 for (l=0; l<ncols; l++) { 5325 *bufJ++ = cols[l]; 5326 } 5327 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5328 } 5329 } 5330 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5331 } 5332 5333 /* recvs and sends of j-array are completed */ 5334 i = nrecvs; 5335 while (i--) { 5336 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5337 } 5338 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5339 } else if (scall == MAT_REUSE_MATRIX) { 5340 sstartsj = *startsj_s; 5341 rstartsj = *startsj_r; 5342 bufa = *bufa_ptr; 5343 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5344 b_otha = b_oth->a; 5345 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5346 5347 /* a-array */ 5348 /*---------*/ 5349 /* post receives of a-array */ 5350 for (i=0; i<nrecvs; i++) { 5351 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5352 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5353 } 5354 5355 /* pack the outgoing message a-array */ 5356 k = 0; 5357 for (i=0; i<nsends; i++) { 5358 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5359 bufA = bufa+sstartsj[i]; 5360 for (j=0; j<nrows; j++) { 5361 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5362 for (ll=0; ll<sbs; ll++) { 5363 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5364 for (l=0; l<ncols; l++) { 5365 *bufA++ = vals[l]; 5366 } 5367 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5368 } 5369 } 5370 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5371 } 5372 /* recvs and sends of a-array are completed */ 5373 i = nrecvs; 5374 while (i--) { 5375 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5376 } 5377 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5378 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5379 5380 if (scall == MAT_INITIAL_MATRIX) { 5381 /* put together the new matrix */ 5382 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5383 5384 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5385 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5386 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5387 b_oth->free_a = PETSC_TRUE; 5388 b_oth->free_ij = PETSC_TRUE; 5389 b_oth->nonew = 0; 5390 5391 ierr = PetscFree(bufj);CHKERRQ(ierr); 5392 if (!startsj_s || !bufa_ptr) { 5393 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5394 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5395 } else { 5396 *startsj_s = sstartsj; 5397 *startsj_r = rstartsj; 5398 *bufa_ptr = bufa; 5399 } 5400 } 5401 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5402 PetscFunctionReturn(0); 5403 } 5404 5405 #undef __FUNCT__ 5406 #define __FUNCT__ "MatGetCommunicationStructs" 5407 /*@C 5408 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5409 5410 Not Collective 5411 5412 Input Parameters: 5413 . A - The matrix in mpiaij format 5414 5415 Output Parameter: 5416 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5417 . colmap - A map from global column index to local index into lvec 5418 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5419 5420 Level: developer 5421 5422 @*/ 5423 #if defined(PETSC_USE_CTABLE) 5424 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5425 #else 5426 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5427 #endif 5428 { 5429 Mat_MPIAIJ *a; 5430 5431 PetscFunctionBegin; 5432 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5433 PetscValidPointer(lvec, 2); 5434 PetscValidPointer(colmap, 3); 5435 PetscValidPointer(multScatter, 4); 5436 a = (Mat_MPIAIJ*) A->data; 5437 if (lvec) *lvec = a->lvec; 5438 if (colmap) *colmap = a->colmap; 5439 if (multScatter) *multScatter = a->Mvctx; 5440 PetscFunctionReturn(0); 5441 } 5442 5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5444 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5445 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5446 5447 #undef __FUNCT__ 5448 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5449 /* 5450 Computes (B'*A')' since computing B*A directly is untenable 5451 5452 n p p 5453 ( ) ( ) ( ) 5454 m ( A ) * n ( B ) = m ( C ) 5455 ( ) ( ) ( ) 5456 5457 */ 5458 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5459 { 5460 PetscErrorCode ierr; 5461 Mat At,Bt,Ct; 5462 5463 PetscFunctionBegin; 5464 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5465 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5466 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5467 ierr = MatDestroy(&At);CHKERRQ(ierr); 5468 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5469 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5470 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5471 PetscFunctionReturn(0); 5472 } 5473 5474 #undef __FUNCT__ 5475 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5476 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5477 { 5478 PetscErrorCode ierr; 5479 PetscInt m=A->rmap->n,n=B->cmap->n; 5480 Mat Cmat; 5481 5482 PetscFunctionBegin; 5483 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5484 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5485 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5486 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5487 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5488 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5489 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5490 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5491 5492 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5493 5494 *C = Cmat; 5495 PetscFunctionReturn(0); 5496 } 5497 5498 /* ----------------------------------------------------------------*/ 5499 #undef __FUNCT__ 5500 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5501 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5502 { 5503 PetscErrorCode ierr; 5504 5505 PetscFunctionBegin; 5506 if (scall == MAT_INITIAL_MATRIX) { 5507 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5508 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5509 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5510 } 5511 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5512 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5513 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5514 PetscFunctionReturn(0); 5515 } 5516 5517 #if defined(PETSC_HAVE_MUMPS) 5518 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5519 #endif 5520 #if defined(PETSC_HAVE_PASTIX) 5521 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5522 #endif 5523 #if defined(PETSC_HAVE_SUPERLU_DIST) 5524 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5525 #endif 5526 #if defined(PETSC_HAVE_CLIQUE) 5527 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*); 5528 #endif 5529 5530 /*MC 5531 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5532 5533 Options Database Keys: 5534 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5535 5536 Level: beginner 5537 5538 .seealso: MatCreateAIJ() 5539 M*/ 5540 5541 #undef __FUNCT__ 5542 #define __FUNCT__ "MatCreate_MPIAIJ" 5543 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5544 { 5545 Mat_MPIAIJ *b; 5546 PetscErrorCode ierr; 5547 PetscMPIInt size; 5548 5549 PetscFunctionBegin; 5550 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5551 5552 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5553 B->data = (void*)b; 5554 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5555 B->assembled = PETSC_FALSE; 5556 B->insertmode = NOT_SET_VALUES; 5557 b->size = size; 5558 5559 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5560 5561 /* build cache for off array entries formed */ 5562 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5563 5564 b->donotstash = PETSC_FALSE; 5565 b->colmap = 0; 5566 b->garray = 0; 5567 b->roworiented = PETSC_TRUE; 5568 5569 /* stuff used for matrix vector multiply */ 5570 b->lvec = NULL; 5571 b->Mvctx = NULL; 5572 5573 /* stuff for MatGetRow() */ 5574 b->rowindices = 0; 5575 b->rowvalues = 0; 5576 b->getrowactive = PETSC_FALSE; 5577 5578 /* flexible pointer used in CUSP/CUSPARSE classes */ 5579 b->spptr = NULL; 5580 5581 #if defined(PETSC_HAVE_MUMPS) 5582 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr); 5583 #endif 5584 #if defined(PETSC_HAVE_PASTIX) 5585 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5586 #endif 5587 #if defined(PETSC_HAVE_SUPERLU_DIST) 5588 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5589 #endif 5590 #if defined(PETSC_HAVE_CLIQUE) 5591 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr); 5592 #endif 5593 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5594 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5595 ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5597 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5598 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5600 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5601 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5603 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5604 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5607 PetscFunctionReturn(0); 5608 } 5609 5610 #undef __FUNCT__ 5611 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5612 /*@C 5613 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5614 and "off-diagonal" part of the matrix in CSR format. 5615 5616 Collective on MPI_Comm 5617 5618 Input Parameters: 5619 + comm - MPI communicator 5620 . m - number of local rows (Cannot be PETSC_DECIDE) 5621 . n - This value should be the same as the local size used in creating the 5622 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5623 calculated if N is given) For square matrices n is almost always m. 5624 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5625 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5626 . i - row indices for "diagonal" portion of matrix 5627 . j - column indices 5628 . a - matrix values 5629 . oi - row indices for "off-diagonal" portion of matrix 5630 . oj - column indices 5631 - oa - matrix values 5632 5633 Output Parameter: 5634 . mat - the matrix 5635 5636 Level: advanced 5637 5638 Notes: 5639 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5640 must free the arrays once the matrix has been destroyed and not before. 5641 5642 The i and j indices are 0 based 5643 5644 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5645 5646 This sets local rows and cannot be used to set off-processor values. 5647 5648 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5649 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5650 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5651 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5652 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5653 communication if it is known that only local entries will be set. 5654 5655 .keywords: matrix, aij, compressed row, sparse, parallel 5656 5657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5658 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5659 C@*/ 5660 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5661 { 5662 PetscErrorCode ierr; 5663 Mat_MPIAIJ *maij; 5664 5665 PetscFunctionBegin; 5666 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5667 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5668 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5669 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5670 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5671 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5672 maij = (Mat_MPIAIJ*) (*mat)->data; 5673 5674 (*mat)->preallocated = PETSC_TRUE; 5675 5676 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5677 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5678 5679 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5680 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5681 5682 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5683 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5684 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 5687 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5689 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5690 PetscFunctionReturn(0); 5691 } 5692 5693 /* 5694 Special version for direct calls from Fortran 5695 */ 5696 #include <petsc-private/fortranimpl.h> 5697 5698 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5699 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5700 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5701 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5702 #endif 5703 5704 /* Change these macros so can be used in void function */ 5705 #undef CHKERRQ 5706 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5707 #undef SETERRQ2 5708 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5709 #undef SETERRQ3 5710 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5711 #undef SETERRQ 5712 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5713 5714 #undef __FUNCT__ 5715 #define __FUNCT__ "matsetvaluesmpiaij_" 5716 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5717 { 5718 Mat mat = *mmat; 5719 PetscInt m = *mm, n = *mn; 5720 InsertMode addv = *maddv; 5721 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5722 PetscScalar value; 5723 PetscErrorCode ierr; 5724 5725 MatCheckPreallocated(mat,1); 5726 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5727 5728 #if defined(PETSC_USE_DEBUG) 5729 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5730 #endif 5731 { 5732 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5733 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5734 PetscBool roworiented = aij->roworiented; 5735 5736 /* Some Variables required in the macro */ 5737 Mat A = aij->A; 5738 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5739 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5740 MatScalar *aa = a->a; 5741 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5742 Mat B = aij->B; 5743 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5744 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5745 MatScalar *ba = b->a; 5746 5747 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5748 PetscInt nonew = a->nonew; 5749 MatScalar *ap1,*ap2; 5750 5751 PetscFunctionBegin; 5752 for (i=0; i<m; i++) { 5753 if (im[i] < 0) continue; 5754 #if defined(PETSC_USE_DEBUG) 5755 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5756 #endif 5757 if (im[i] >= rstart && im[i] < rend) { 5758 row = im[i] - rstart; 5759 lastcol1 = -1; 5760 rp1 = aj + ai[row]; 5761 ap1 = aa + ai[row]; 5762 rmax1 = aimax[row]; 5763 nrow1 = ailen[row]; 5764 low1 = 0; 5765 high1 = nrow1; 5766 lastcol2 = -1; 5767 rp2 = bj + bi[row]; 5768 ap2 = ba + bi[row]; 5769 rmax2 = bimax[row]; 5770 nrow2 = bilen[row]; 5771 low2 = 0; 5772 high2 = nrow2; 5773 5774 for (j=0; j<n; j++) { 5775 if (roworiented) value = v[i*n+j]; 5776 else value = v[i+j*m]; 5777 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5778 if (in[j] >= cstart && in[j] < cend) { 5779 col = in[j] - cstart; 5780 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5781 } else if (in[j] < 0) continue; 5782 #if defined(PETSC_USE_DEBUG) 5783 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5784 #endif 5785 else { 5786 if (mat->was_assembled) { 5787 if (!aij->colmap) { 5788 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5789 } 5790 #if defined(PETSC_USE_CTABLE) 5791 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5792 col--; 5793 #else 5794 col = aij->colmap[in[j]] - 1; 5795 #endif 5796 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5797 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5798 col = in[j]; 5799 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5800 B = aij->B; 5801 b = (Mat_SeqAIJ*)B->data; 5802 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5803 rp2 = bj + bi[row]; 5804 ap2 = ba + bi[row]; 5805 rmax2 = bimax[row]; 5806 nrow2 = bilen[row]; 5807 low2 = 0; 5808 high2 = nrow2; 5809 bm = aij->B->rmap->n; 5810 ba = b->a; 5811 } 5812 } else col = in[j]; 5813 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5814 } 5815 } 5816 } else if (!aij->donotstash) { 5817 if (roworiented) { 5818 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5819 } else { 5820 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5821 } 5822 } 5823 } 5824 } 5825 PetscFunctionReturnVoid(); 5826 } 5827 5828