1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 if (!rank) { 1308 header[0] = MAT_FILE_CLASSID; 1309 header[1] = mat->rmap->N; 1310 header[2] = mat->cmap->N; 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1312 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1313 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1314 /* get largest number of rows any processor has */ 1315 rlen = mat->rmap->n; 1316 range = mat->rmap->range; 1317 for (i=1; i<size; i++) { 1318 rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } 1320 } else { 1321 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1322 rlen = mat->rmap->n; 1323 } 1324 1325 /* load up the local row counts */ 1326 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1327 for (i=0; i<mat->rmap->n; i++) { 1328 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1329 } 1330 1331 /* store the row lengths to the file */ 1332 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1333 if (!rank) { 1334 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 for (i=1; i<size; i++) { 1336 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1337 rlen = range[i+1] - range[i]; 1338 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1339 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1340 } 1341 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1342 } else { 1343 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1344 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1345 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1346 } 1347 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1348 1349 /* load up the local column indices */ 1350 nzmax = nz; /* )th processor needs space a largest processor needs */ 1351 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1352 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1353 cnt = 0; 1354 for (i=0; i<mat->rmap->n; i++) { 1355 for (j=B->i[i]; j<B->i[i+1]; j++) { 1356 if ( (col = garray[B->j[j]]) > cstart) break; 1357 column_indices[cnt++] = col; 1358 } 1359 for (k=A->i[i]; k<A->i[i+1]; k++) { 1360 column_indices[cnt++] = A->j[k] + cstart; 1361 } 1362 for (; j<B->i[i+1]; j++) { 1363 column_indices[cnt++] = garray[B->j[j]]; 1364 } 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column indices to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1388 1389 /* load up the local column values */ 1390 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1391 cnt = 0; 1392 for (i=0; i<mat->rmap->n; i++) { 1393 for (j=B->i[i]; j<B->i[i+1]; j++) { 1394 if ( garray[B->j[j]] > cstart) break; 1395 column_values[cnt++] = B->a[j]; 1396 } 1397 for (k=A->i[i]; k<A->i[i+1]; k++) { 1398 column_values[cnt++] = A->a[k]; 1399 } 1400 for (; j<B->i[i+1]; j++) { 1401 column_values[cnt++] = B->a[j]; 1402 } 1403 } 1404 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1405 1406 /* store the column values to the file */ 1407 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1408 if (!rank) { 1409 MPI_Status status; 1410 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1411 for (i=1; i<size; i++) { 1412 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1413 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1414 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1415 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1416 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1417 } 1418 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1419 } else { 1420 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1421 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1422 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1424 } 1425 ierr = PetscFree(column_values);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1431 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1432 { 1433 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1434 PetscErrorCode ierr; 1435 PetscMPIInt rank = aij->rank,size = aij->size; 1436 PetscBool isdraw,iascii,isbinary; 1437 PetscViewer sviewer; 1438 PetscViewerFormat format; 1439 1440 PetscFunctionBegin; 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1442 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1443 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1444 if (iascii) { 1445 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1446 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1447 MatInfo info; 1448 PetscBool inodes; 1449 1450 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1451 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1452 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1454 if (!inodes) { 1455 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1456 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1457 } else { 1458 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1459 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1460 } 1461 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1463 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1465 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1468 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1469 PetscFunctionReturn(0); 1470 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1471 PetscInt inodecount,inodelimit,*inodes; 1472 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1473 if (inodes) { 1474 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1475 } else { 1476 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1477 } 1478 PetscFunctionReturn(0); 1479 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1480 PetscFunctionReturn(0); 1481 } 1482 } else if (isbinary) { 1483 if (size == 1) { 1484 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1485 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1486 } else { 1487 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1488 } 1489 PetscFunctionReturn(0); 1490 } else if (isdraw) { 1491 PetscDraw draw; 1492 PetscBool isnull; 1493 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1494 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1495 } 1496 1497 if (size == 1) { 1498 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1500 } else { 1501 /* assemble the entire matrix onto first processor. */ 1502 Mat A; 1503 Mat_SeqAIJ *Aloc; 1504 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1505 MatScalar *a; 1506 1507 if (mat->rmap->N > 1024) { 1508 PetscBool flg = PETSC_FALSE; 1509 1510 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1511 if (!flg) { 1512 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1513 } 1514 } 1515 1516 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1517 if (!rank) { 1518 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1519 } else { 1520 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1521 } 1522 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1523 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1524 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1525 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1526 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1527 1528 /* copy over the A part */ 1529 Aloc = (Mat_SeqAIJ*)aij->A->data; 1530 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1531 row = mat->rmap->rstart; 1532 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1533 for (i=0; i<m; i++) { 1534 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1535 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1536 } 1537 aj = Aloc->j; 1538 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1539 1540 /* copy over the B part */ 1541 Aloc = (Mat_SeqAIJ*)aij->B->data; 1542 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1543 row = mat->rmap->rstart; 1544 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1545 ct = cols; 1546 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1547 for (i=0; i<m; i++) { 1548 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1549 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1550 } 1551 ierr = PetscFree(ct);CHKERRQ(ierr); 1552 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1553 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 /* 1555 Everyone has to call to draw the matrix since the graphics waits are 1556 synchronized across all processors that share the PetscDraw object 1557 */ 1558 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1559 if (!rank) { 1560 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1561 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1562 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1563 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1564 } 1565 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 ierr = MatDestroy(&A);CHKERRQ(ierr); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 #undef __FUNCT__ 1572 #define __FUNCT__ "MatView_MPIAIJ" 1573 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1574 { 1575 PetscErrorCode ierr; 1576 PetscBool iascii,isdraw,issocket,isbinary; 1577 1578 PetscFunctionBegin; 1579 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1580 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1581 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1582 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1583 if (iascii || isdraw || isbinary || issocket) { 1584 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1585 } else { 1586 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1587 } 1588 PetscFunctionReturn(0); 1589 } 1590 1591 #undef __FUNCT__ 1592 #define __FUNCT__ "MatSOR_MPIAIJ" 1593 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1596 PetscErrorCode ierr; 1597 Vec bb1 = 0; 1598 PetscBool hasop; 1599 1600 PetscFunctionBegin; 1601 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1602 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1603 } 1604 1605 if (flag == SOR_APPLY_UPPER) { 1606 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 1610 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1611 if (flag & SOR_ZERO_INITIAL_GUESS) { 1612 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1613 its--; 1614 } 1615 1616 while (its--) { 1617 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1618 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1619 1620 /* update rhs: bb1 = bb - B*x */ 1621 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1622 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1623 1624 /* local sweep */ 1625 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1626 } 1627 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1628 if (flag & SOR_ZERO_INITIAL_GUESS) { 1629 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1630 its--; 1631 } 1632 while (its--) { 1633 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1634 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1635 1636 /* update rhs: bb1 = bb - B*x */ 1637 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1638 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1639 1640 /* local sweep */ 1641 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1642 } 1643 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1644 if (flag & SOR_ZERO_INITIAL_GUESS) { 1645 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1646 its--; 1647 } 1648 while (its--) { 1649 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1650 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1651 1652 /* update rhs: bb1 = bb - B*x */ 1653 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1654 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1655 1656 /* local sweep */ 1657 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1658 } 1659 } else if (flag & SOR_EISENSTAT) { 1660 Vec xx1; 1661 1662 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1663 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1664 1665 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1666 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1667 if (!mat->diag) { 1668 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1669 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1670 } 1671 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1672 if (hasop) { 1673 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1674 } else { 1675 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1676 } 1677 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1678 1679 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1680 1681 /* local sweep */ 1682 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1683 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1684 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1685 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1686 1687 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatPermute_MPIAIJ" 1693 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1694 { 1695 MPI_Comm comm; 1696 PetscInt first,local_rowsize,local_colsize; 1697 const PetscInt *rows; 1698 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1703 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1704 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1705 /* collect the global row permutation and invert it */ 1706 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1707 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1708 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1709 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1710 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1711 /* get the local target indices */ 1712 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1713 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1714 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1716 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1717 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1718 /* the column permutation is so much easier; 1719 make a local version of 'colp' and invert it */ 1720 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1721 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1722 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1723 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1724 /* now we just get the submatrix */ 1725 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1726 /* clean up */ 1727 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1728 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1734 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1735 { 1736 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1737 Mat A = mat->A,B = mat->B; 1738 PetscErrorCode ierr; 1739 PetscReal isend[5],irecv[5]; 1740 1741 PetscFunctionBegin; 1742 info->block_size = 1.0; 1743 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1744 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1745 isend[3] = info->memory; isend[4] = info->mallocs; 1746 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1747 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1748 isend[3] += info->memory; isend[4] += info->mallocs; 1749 if (flag == MAT_LOCAL) { 1750 info->nz_used = isend[0]; 1751 info->nz_allocated = isend[1]; 1752 info->nz_unneeded = isend[2]; 1753 info->memory = isend[3]; 1754 info->mallocs = isend[4]; 1755 } else if (flag == MAT_GLOBAL_MAX) { 1756 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1757 info->nz_used = irecv[0]; 1758 info->nz_allocated = irecv[1]; 1759 info->nz_unneeded = irecv[2]; 1760 info->memory = irecv[3]; 1761 info->mallocs = irecv[4]; 1762 } else if (flag == MAT_GLOBAL_SUM) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } 1770 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1771 info->fill_ratio_needed = 0; 1772 info->factor_mallocs = 0; 1773 1774 PetscFunctionReturn(0); 1775 } 1776 1777 #undef __FUNCT__ 1778 #define __FUNCT__ "MatSetOption_MPIAIJ" 1779 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1780 { 1781 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1782 PetscErrorCode ierr; 1783 1784 PetscFunctionBegin; 1785 switch (op) { 1786 case MAT_NEW_NONZERO_LOCATIONS: 1787 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1788 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1789 case MAT_KEEP_NONZERO_PATTERN: 1790 case MAT_NEW_NONZERO_LOCATION_ERR: 1791 case MAT_USE_INODES: 1792 case MAT_IGNORE_ZERO_ENTRIES: 1793 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1794 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1795 break; 1796 case MAT_ROW_ORIENTED: 1797 a->roworiented = flg; 1798 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1799 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1800 break; 1801 case MAT_NEW_DIAGONALS: 1802 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1803 break; 1804 case MAT_IGNORE_OFF_PROC_ENTRIES: 1805 a->donotstash = flg; 1806 break; 1807 case MAT_SPD: 1808 A->spd_set = PETSC_TRUE; 1809 A->spd = flg; 1810 if (flg) { 1811 A->symmetric = PETSC_TRUE; 1812 A->structurally_symmetric = PETSC_TRUE; 1813 A->symmetric_set = PETSC_TRUE; 1814 A->structurally_symmetric_set = PETSC_TRUE; 1815 } 1816 break; 1817 case MAT_SYMMETRIC: 1818 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_STRUCTURALLY_SYMMETRIC: 1821 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1822 break; 1823 case MAT_HERMITIAN: 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 break; 1826 case MAT_SYMMETRY_ETERNAL: 1827 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1828 break; 1829 case DIAGBLOCK_MAT_CSR: 1830 case OFFDIAGBLOCK_MAT_CSR: 1831 case MAT_CSR: 1832 case DIAGBLOCK_MAT_DIA: 1833 case OFFDIAGBLOCK_MAT_DIA: 1834 case MAT_DIA: 1835 case DIAGBLOCK_MAT_ELL: 1836 case OFFDIAGBLOCK_MAT_ELL: 1837 case MAT_ELL: 1838 case DIAGBLOCK_MAT_HYB: 1839 case OFFDIAGBLOCK_MAT_HYB: 1840 case MAT_HYB: 1841 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1842 break; 1843 1844 default: 1845 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1846 } 1847 PetscFunctionReturn(0); 1848 } 1849 1850 #undef __FUNCT__ 1851 #define __FUNCT__ "MatGetRow_MPIAIJ" 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) { max = tmp; } 1874 } 1875 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 #undef __FUNCT__ 1931 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1932 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1933 { 1934 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1935 1936 PetscFunctionBegin; 1937 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1938 aij->getrowactive = PETSC_FALSE; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatNorm_MPIAIJ" 1944 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1948 PetscErrorCode ierr; 1949 PetscInt i,j,cstart = mat->cmap->rstart; 1950 PetscReal sum = 0.0; 1951 MatScalar *v; 1952 1953 PetscFunctionBegin; 1954 if (aij->size == 1) { 1955 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1956 } else { 1957 if (type == NORM_FROBENIUS) { 1958 v = amat->a; 1959 for (i=0; i<amat->nz; i++) { 1960 #if defined(PETSC_USE_COMPLEX) 1961 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1962 #else 1963 sum += (*v)*(*v); v++; 1964 #endif 1965 } 1966 v = bmat->a; 1967 for (i=0; i<bmat->nz; i++) { 1968 #if defined(PETSC_USE_COMPLEX) 1969 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1970 #else 1971 sum += (*v)*(*v); v++; 1972 #endif 1973 } 1974 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1975 *norm = PetscSqrtReal(*norm); 1976 } else if (type == NORM_1) { /* max column norm */ 1977 PetscReal *tmp,*tmp2; 1978 PetscInt *jj,*garray = aij->garray; 1979 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1980 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1981 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1982 *norm = 0.0; 1983 v = amat->a; jj = amat->j; 1984 for (j=0; j<amat->nz; j++) { 1985 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1986 } 1987 v = bmat->a; jj = bmat->j; 1988 for (j=0; j<bmat->nz; j++) { 1989 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1990 } 1991 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1992 for (j=0; j<mat->cmap->N; j++) { 1993 if (tmp2[j] > *norm) *norm = tmp2[j]; 1994 } 1995 ierr = PetscFree(tmp);CHKERRQ(ierr); 1996 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1997 } else if (type == NORM_INFINITY) { /* max row norm */ 1998 PetscReal ntemp = 0.0; 1999 for (j=0; j<aij->A->rmap->n; j++) { 2000 v = amat->a + amat->i[j]; 2001 sum = 0.0; 2002 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2003 sum += PetscAbsScalar(*v); v++; 2004 } 2005 v = bmat->a + bmat->i[j]; 2006 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2007 sum += PetscAbsScalar(*v); v++; 2008 } 2009 if (sum > ntemp) ntemp = sum; 2010 } 2011 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2012 } else { 2013 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2014 } 2015 } 2016 PetscFunctionReturn(0); 2017 } 2018 2019 #undef __FUNCT__ 2020 #define __FUNCT__ "MatTranspose_MPIAIJ" 2021 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2022 { 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2025 PetscErrorCode ierr; 2026 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2027 PetscInt cstart=A->cmap->rstart,ncol; 2028 Mat B; 2029 MatScalar *array; 2030 2031 PetscFunctionBegin; 2032 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2033 2034 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2035 ai = Aloc->i; aj = Aloc->j; 2036 bi = Bloc->i; bj = Bloc->j; 2037 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2038 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2039 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2040 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2041 for (i=0; i<ai[ma]; i++){ 2042 d_nnz[aj[i]] ++; 2043 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2044 } 2045 2046 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2047 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2048 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2049 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2050 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2051 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2052 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2053 } else { 2054 B = *matout; 2055 } 2056 2057 /* copy over the A part */ 2058 array = Aloc->a; 2059 row = A->rmap->rstart; 2060 for (i=0; i<ma; i++) { 2061 ncol = ai[i+1]-ai[i]; 2062 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2063 row++; array += ncol; aj += ncol; 2064 } 2065 aj = Aloc->j; 2066 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2067 2068 /* copy over the B part */ 2069 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2070 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2071 array = Bloc->a; 2072 row = A->rmap->rstart; 2073 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2074 cols_tmp = cols; 2075 for (i=0; i<mb; i++) { 2076 ncol = bi[i+1]-bi[i]; 2077 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2078 row++; array += ncol; cols_tmp += ncol; 2079 } 2080 ierr = PetscFree(cols);CHKERRQ(ierr); 2081 2082 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2083 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2085 *matout = B; 2086 } else { 2087 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 #undef __FUNCT__ 2093 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2094 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2095 { 2096 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2097 Mat a = aij->A,b = aij->B; 2098 PetscErrorCode ierr; 2099 PetscInt s1,s2,s3; 2100 2101 PetscFunctionBegin; 2102 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2103 if (rr) { 2104 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2105 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2106 /* Overlap communication with computation. */ 2107 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2108 } 2109 if (ll) { 2110 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2111 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2112 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2113 } 2114 /* scale the diagonal block */ 2115 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2116 2117 if (rr) { 2118 /* Do a scatter end and then right scale the off-diagonal block */ 2119 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2120 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2121 } 2122 2123 PetscFunctionReturn(0); 2124 } 2125 2126 #undef __FUNCT__ 2127 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2128 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2129 { 2130 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 #undef __FUNCT__ 2139 #define __FUNCT__ "MatEqual_MPIAIJ" 2140 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2141 { 2142 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2143 Mat a,b,c,d; 2144 PetscBool flg; 2145 PetscErrorCode ierr; 2146 2147 PetscFunctionBegin; 2148 a = matA->A; b = matA->B; 2149 c = matB->A; d = matB->B; 2150 2151 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2152 if (flg) { 2153 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2154 } 2155 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2156 PetscFunctionReturn(0); 2157 } 2158 2159 #undef __FUNCT__ 2160 #define __FUNCT__ "MatCopy_MPIAIJ" 2161 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2162 { 2163 PetscErrorCode ierr; 2164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2165 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2166 2167 PetscFunctionBegin; 2168 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2169 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2170 /* because of the column compression in the off-processor part of the matrix a->B, 2171 the number of columns in a->B and b->B may be different, hence we cannot call 2172 the MatCopy() directly on the two parts. If need be, we can provide a more 2173 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2174 then copying the submatrices */ 2175 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2176 } else { 2177 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2178 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2179 } 2180 PetscFunctionReturn(0); 2181 } 2182 2183 #undef __FUNCT__ 2184 #define __FUNCT__ "MatSetUp_MPIAIJ" 2185 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2186 { 2187 PetscErrorCode ierr; 2188 2189 PetscFunctionBegin; 2190 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 #undef __FUNCT__ 2195 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2196 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2197 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2198 { 2199 PetscInt i,m=Y->rmap->N; 2200 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2201 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2202 const PetscInt *xi = x->i,*yi = y->i; 2203 2204 PetscFunctionBegin; 2205 /* Set the number of nonzeros in the new matrix */ 2206 for(i=0; i<m; i++) { 2207 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2208 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2209 nnz[i] = 0; 2210 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2211 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2212 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2213 nnz[i]++; 2214 } 2215 for (; k<nzy; k++) nnz[i]++; 2216 } 2217 PetscFunctionReturn(0); 2218 } 2219 2220 #undef __FUNCT__ 2221 #define __FUNCT__ "MatAXPY_MPIAIJ" 2222 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2223 { 2224 PetscErrorCode ierr; 2225 PetscInt i; 2226 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2227 PetscBLASInt bnz,one=1; 2228 Mat_SeqAIJ *x,*y; 2229 2230 PetscFunctionBegin; 2231 if (str == SAME_NONZERO_PATTERN) { 2232 PetscScalar alpha = a; 2233 x = (Mat_SeqAIJ *)xx->A->data; 2234 y = (Mat_SeqAIJ *)yy->A->data; 2235 bnz = PetscBLASIntCast(x->nz); 2236 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2237 x = (Mat_SeqAIJ *)xx->B->data; 2238 y = (Mat_SeqAIJ *)yy->B->data; 2239 bnz = PetscBLASIntCast(x->nz); 2240 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2241 } else if (str == SUBSET_NONZERO_PATTERN) { 2242 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2243 2244 x = (Mat_SeqAIJ *)xx->B->data; 2245 y = (Mat_SeqAIJ *)yy->B->data; 2246 if (y->xtoy && y->XtoY != xx->B) { 2247 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2248 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2249 } 2250 if (!y->xtoy) { /* get xtoy */ 2251 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2252 y->XtoY = xx->B; 2253 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2254 } 2255 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2256 } else { 2257 Mat B; 2258 PetscInt *nnz_d,*nnz_o; 2259 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2260 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2261 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2262 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2263 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2264 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2265 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2266 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2267 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2268 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2269 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2270 ierr = MatHeaderReplace(Y,B); 2271 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2272 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2273 } 2274 PetscFunctionReturn(0); 2275 } 2276 2277 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2278 2279 #undef __FUNCT__ 2280 #define __FUNCT__ "MatConjugate_MPIAIJ" 2281 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2282 { 2283 #if defined(PETSC_USE_COMPLEX) 2284 PetscErrorCode ierr; 2285 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2286 2287 PetscFunctionBegin; 2288 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2289 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2290 #else 2291 PetscFunctionBegin; 2292 #endif 2293 PetscFunctionReturn(0); 2294 } 2295 2296 #undef __FUNCT__ 2297 #define __FUNCT__ "MatRealPart_MPIAIJ" 2298 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2299 { 2300 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2301 PetscErrorCode ierr; 2302 2303 PetscFunctionBegin; 2304 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2305 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2311 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2318 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 #ifdef PETSC_HAVE_PBGL 2323 2324 #include <boost/parallel/mpi/bsp_process_group.hpp> 2325 #include <boost/graph/distributed/ilu_default_graph.hpp> 2326 #include <boost/graph/distributed/ilu_0_block.hpp> 2327 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2328 #include <boost/graph/distributed/petsc/interface.hpp> 2329 #include <boost/multi_array.hpp> 2330 #include <boost/parallel/distributed_property_map->hpp> 2331 2332 #undef __FUNCT__ 2333 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2334 /* 2335 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2336 */ 2337 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2338 { 2339 namespace petsc = boost::distributed::petsc; 2340 2341 namespace graph_dist = boost::graph::distributed; 2342 using boost::graph::distributed::ilu_default::process_group_type; 2343 using boost::graph::ilu_permuted; 2344 2345 PetscBool row_identity, col_identity; 2346 PetscContainer c; 2347 PetscInt m, n, M, N; 2348 PetscErrorCode ierr; 2349 2350 PetscFunctionBegin; 2351 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2352 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2353 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2354 if (!row_identity || !col_identity) { 2355 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2356 } 2357 2358 process_group_type pg; 2359 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2360 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2361 lgraph_type& level_graph = *lgraph_p; 2362 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2363 2364 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2365 ilu_permuted(level_graph); 2366 2367 /* put together the new matrix */ 2368 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2369 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2370 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2371 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2372 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2373 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2374 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2375 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2376 2377 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2378 ierr = PetscContainerSetPointer(c, lgraph_p); 2379 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2380 ierr = PetscContainerDestroy(&c); 2381 PetscFunctionReturn(0); 2382 } 2383 2384 #undef __FUNCT__ 2385 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2386 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2387 { 2388 PetscFunctionBegin; 2389 PetscFunctionReturn(0); 2390 } 2391 2392 #undef __FUNCT__ 2393 #define __FUNCT__ "MatSolve_MPIAIJ" 2394 /* 2395 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2396 */ 2397 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2398 { 2399 namespace graph_dist = boost::graph::distributed; 2400 2401 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2402 lgraph_type* lgraph_p; 2403 PetscContainer c; 2404 PetscErrorCode ierr; 2405 2406 PetscFunctionBegin; 2407 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2408 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2409 ierr = VecCopy(b, x);CHKERRQ(ierr); 2410 2411 PetscScalar* array_x; 2412 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2413 PetscInt sx; 2414 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2415 2416 PetscScalar* array_b; 2417 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2418 PetscInt sb; 2419 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2420 2421 lgraph_type& level_graph = *lgraph_p; 2422 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2423 2424 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2425 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2426 ref_x(array_x, boost::extents[num_vertices(graph)]); 2427 2428 typedef boost::iterator_property_map<array_ref_type::iterator, 2429 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2430 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2431 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2432 2433 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2434 2435 PetscFunctionReturn(0); 2436 } 2437 #endif 2438 2439 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2440 PetscInt nzlocal,nsends,nrecvs; 2441 PetscMPIInt *send_rank,*recv_rank; 2442 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2443 PetscScalar *sbuf_a,**rbuf_a; 2444 PetscErrorCode (*Destroy)(Mat); 2445 } Mat_Redundant; 2446 2447 #undef __FUNCT__ 2448 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2449 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2450 { 2451 PetscErrorCode ierr; 2452 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2453 PetscInt i; 2454 2455 PetscFunctionBegin; 2456 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2457 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2458 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2459 for (i=0; i<redund->nrecvs; i++){ 2460 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2461 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2462 } 2463 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2464 ierr = PetscFree(redund);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 #undef __FUNCT__ 2469 #define __FUNCT__ "MatDestroy_MatRedundant" 2470 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2471 { 2472 PetscErrorCode ierr; 2473 PetscContainer container; 2474 Mat_Redundant *redund=PETSC_NULL; 2475 2476 PetscFunctionBegin; 2477 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2478 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2479 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2480 A->ops->destroy = redund->Destroy; 2481 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2482 if (A->ops->destroy) { 2483 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2484 } 2485 PetscFunctionReturn(0); 2486 } 2487 2488 #undef __FUNCT__ 2489 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2490 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2491 { 2492 PetscMPIInt rank,size; 2493 MPI_Comm comm=((PetscObject)mat)->comm; 2494 PetscErrorCode ierr; 2495 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2496 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2497 PetscInt *rowrange=mat->rmap->range; 2498 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2499 Mat A=aij->A,B=aij->B,C=*matredundant; 2500 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2501 PetscScalar *sbuf_a; 2502 PetscInt nzlocal=a->nz+b->nz; 2503 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2504 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2505 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2506 MatScalar *aworkA,*aworkB; 2507 PetscScalar *vals; 2508 PetscMPIInt tag1,tag2,tag3,imdex; 2509 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2510 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2511 MPI_Status recv_status,*send_status; 2512 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2513 PetscInt **rbuf_j=PETSC_NULL; 2514 PetscScalar **rbuf_a=PETSC_NULL; 2515 Mat_Redundant *redund=PETSC_NULL; 2516 PetscContainer container; 2517 2518 PetscFunctionBegin; 2519 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2520 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2521 2522 if (reuse == MAT_REUSE_MATRIX) { 2523 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2524 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2525 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2526 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2527 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2528 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2529 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2530 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2531 2532 nsends = redund->nsends; 2533 nrecvs = redund->nrecvs; 2534 send_rank = redund->send_rank; 2535 recv_rank = redund->recv_rank; 2536 sbuf_nz = redund->sbuf_nz; 2537 rbuf_nz = redund->rbuf_nz; 2538 sbuf_j = redund->sbuf_j; 2539 sbuf_a = redund->sbuf_a; 2540 rbuf_j = redund->rbuf_j; 2541 rbuf_a = redund->rbuf_a; 2542 } 2543 2544 if (reuse == MAT_INITIAL_MATRIX){ 2545 PetscMPIInt subrank,subsize; 2546 PetscInt nleftover,np_subcomm; 2547 /* get the destination processors' id send_rank, nsends and nrecvs */ 2548 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2549 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2550 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2551 np_subcomm = size/nsubcomm; 2552 nleftover = size - nsubcomm*np_subcomm; 2553 nsends = 0; nrecvs = 0; 2554 for (i=0; i<size; i++){ /* i=rank*/ 2555 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2556 send_rank[nsends] = i; nsends++; 2557 recv_rank[nrecvs++] = i; 2558 } 2559 } 2560 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2561 i = size-nleftover-1; 2562 j = 0; 2563 while (j < nsubcomm - nleftover){ 2564 send_rank[nsends++] = i; 2565 i--; j++; 2566 } 2567 } 2568 2569 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2570 for (i=0; i<nleftover; i++){ 2571 recv_rank[nrecvs++] = size-nleftover+i; 2572 } 2573 } 2574 2575 /* allocate sbuf_j, sbuf_a */ 2576 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2577 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2578 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2579 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2580 2581 /* copy mat's local entries into the buffers */ 2582 if (reuse == MAT_INITIAL_MATRIX){ 2583 rownz_max = 0; 2584 rptr = sbuf_j; 2585 cols = sbuf_j + rend-rstart + 1; 2586 vals = sbuf_a; 2587 rptr[0] = 0; 2588 for (i=0; i<rend-rstart; i++){ 2589 row = i + rstart; 2590 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2591 ncols = nzA + nzB; 2592 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2593 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2594 /* load the column indices for this row into cols */ 2595 lwrite = 0; 2596 for (l=0; l<nzB; l++) { 2597 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2598 vals[lwrite] = aworkB[l]; 2599 cols[lwrite++] = ctmp; 2600 } 2601 } 2602 for (l=0; l<nzA; l++){ 2603 vals[lwrite] = aworkA[l]; 2604 cols[lwrite++] = cstart + cworkA[l]; 2605 } 2606 for (l=0; l<nzB; l++) { 2607 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2608 vals[lwrite] = aworkB[l]; 2609 cols[lwrite++] = ctmp; 2610 } 2611 } 2612 vals += ncols; 2613 cols += ncols; 2614 rptr[i+1] = rptr[i] + ncols; 2615 if (rownz_max < ncols) rownz_max = ncols; 2616 } 2617 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2618 } else { /* only copy matrix values into sbuf_a */ 2619 rptr = sbuf_j; 2620 vals = sbuf_a; 2621 rptr[0] = 0; 2622 for (i=0; i<rend-rstart; i++){ 2623 row = i + rstart; 2624 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2625 ncols = nzA + nzB; 2626 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2627 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2628 lwrite = 0; 2629 for (l=0; l<nzB; l++) { 2630 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2631 } 2632 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2633 for (l=0; l<nzB; l++) { 2634 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2635 } 2636 vals += ncols; 2637 rptr[i+1] = rptr[i] + ncols; 2638 } 2639 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2640 2641 /* send nzlocal to others, and recv other's nzlocal */ 2642 /*--------------------------------------------------*/ 2643 if (reuse == MAT_INITIAL_MATRIX){ 2644 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2645 s_waits2 = s_waits3 + nsends; 2646 s_waits1 = s_waits2 + nsends; 2647 r_waits1 = s_waits1 + nsends; 2648 r_waits2 = r_waits1 + nrecvs; 2649 r_waits3 = r_waits2 + nrecvs; 2650 } else { 2651 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2652 r_waits3 = s_waits3 + nsends; 2653 } 2654 2655 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2656 if (reuse == MAT_INITIAL_MATRIX){ 2657 /* get new tags to keep the communication clean */ 2658 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2659 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2660 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2661 2662 /* post receives of other's nzlocal */ 2663 for (i=0; i<nrecvs; i++){ 2664 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2665 } 2666 /* send nzlocal to others */ 2667 for (i=0; i<nsends; i++){ 2668 sbuf_nz[i] = nzlocal; 2669 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2670 } 2671 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2672 count = nrecvs; 2673 while (count) { 2674 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2675 recv_rank[imdex] = recv_status.MPI_SOURCE; 2676 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2677 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2678 2679 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2680 rbuf_nz[imdex] += i + 2; 2681 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2682 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2683 count--; 2684 } 2685 /* wait on sends of nzlocal */ 2686 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2687 /* send mat->i,j to others, and recv from other's */ 2688 /*------------------------------------------------*/ 2689 for (i=0; i<nsends; i++){ 2690 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2691 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2692 } 2693 /* wait on receives of mat->i,j */ 2694 /*------------------------------*/ 2695 count = nrecvs; 2696 while (count) { 2697 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2698 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2699 count--; 2700 } 2701 /* wait on sends of mat->i,j */ 2702 /*---------------------------*/ 2703 if (nsends) { 2704 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2705 } 2706 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2707 2708 /* post receives, send and receive mat->a */ 2709 /*----------------------------------------*/ 2710 for (imdex=0; imdex<nrecvs; imdex++) { 2711 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2712 } 2713 for (i=0; i<nsends; i++){ 2714 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2715 } 2716 count = nrecvs; 2717 while (count) { 2718 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2719 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2720 count--; 2721 } 2722 if (nsends) { 2723 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2724 } 2725 2726 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2727 2728 /* create redundant matrix */ 2729 /*-------------------------*/ 2730 if (reuse == MAT_INITIAL_MATRIX){ 2731 /* compute rownz_max for preallocation */ 2732 for (imdex=0; imdex<nrecvs; imdex++){ 2733 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2734 rptr = rbuf_j[imdex]; 2735 for (i=0; i<j; i++){ 2736 ncols = rptr[i+1] - rptr[i]; 2737 if (rownz_max < ncols) rownz_max = ncols; 2738 } 2739 } 2740 2741 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2742 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2743 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2744 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2745 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2746 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2747 } else { 2748 C = *matredundant; 2749 } 2750 2751 /* insert local matrix entries */ 2752 rptr = sbuf_j; 2753 cols = sbuf_j + rend-rstart + 1; 2754 vals = sbuf_a; 2755 for (i=0; i<rend-rstart; i++){ 2756 row = i + rstart; 2757 ncols = rptr[i+1] - rptr[i]; 2758 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2759 vals += ncols; 2760 cols += ncols; 2761 } 2762 /* insert received matrix entries */ 2763 for (imdex=0; imdex<nrecvs; imdex++){ 2764 rstart = rowrange[recv_rank[imdex]]; 2765 rend = rowrange[recv_rank[imdex]+1]; 2766 rptr = rbuf_j[imdex]; 2767 cols = rbuf_j[imdex] + rend-rstart + 1; 2768 vals = rbuf_a[imdex]; 2769 for (i=0; i<rend-rstart; i++){ 2770 row = i + rstart; 2771 ncols = rptr[i+1] - rptr[i]; 2772 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2773 vals += ncols; 2774 cols += ncols; 2775 } 2776 } 2777 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2778 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2779 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2780 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2781 if (reuse == MAT_INITIAL_MATRIX) { 2782 PetscContainer container; 2783 *matredundant = C; 2784 /* create a supporting struct and attach it to C for reuse */ 2785 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2786 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2787 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2788 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2789 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2790 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2791 2792 redund->nzlocal = nzlocal; 2793 redund->nsends = nsends; 2794 redund->nrecvs = nrecvs; 2795 redund->send_rank = send_rank; 2796 redund->recv_rank = recv_rank; 2797 redund->sbuf_nz = sbuf_nz; 2798 redund->rbuf_nz = rbuf_nz; 2799 redund->sbuf_j = sbuf_j; 2800 redund->sbuf_a = sbuf_a; 2801 redund->rbuf_j = rbuf_j; 2802 redund->rbuf_a = rbuf_a; 2803 2804 redund->Destroy = C->ops->destroy; 2805 C->ops->destroy = MatDestroy_MatRedundant; 2806 } 2807 PetscFunctionReturn(0); 2808 } 2809 2810 #undef __FUNCT__ 2811 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2812 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2813 { 2814 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2815 PetscErrorCode ierr; 2816 PetscInt i,*idxb = 0; 2817 PetscScalar *va,*vb; 2818 Vec vtmp; 2819 2820 PetscFunctionBegin; 2821 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2822 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2823 if (idx) { 2824 for (i=0; i<A->rmap->n; i++) { 2825 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2826 } 2827 } 2828 2829 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2830 if (idx) { 2831 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2832 } 2833 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2834 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2835 2836 for (i=0; i<A->rmap->n; i++){ 2837 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2838 va[i] = vb[i]; 2839 if (idx) idx[i] = a->garray[idxb[i]]; 2840 } 2841 } 2842 2843 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2844 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2845 ierr = PetscFree(idxb);CHKERRQ(ierr); 2846 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2847 PetscFunctionReturn(0); 2848 } 2849 2850 #undef __FUNCT__ 2851 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2852 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2853 { 2854 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2855 PetscErrorCode ierr; 2856 PetscInt i,*idxb = 0; 2857 PetscScalar *va,*vb; 2858 Vec vtmp; 2859 2860 PetscFunctionBegin; 2861 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2862 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2863 if (idx) { 2864 for (i=0; i<A->cmap->n; i++) { 2865 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2866 } 2867 } 2868 2869 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2870 if (idx) { 2871 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2872 } 2873 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2874 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2875 2876 for (i=0; i<A->rmap->n; i++){ 2877 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2878 va[i] = vb[i]; 2879 if (idx) idx[i] = a->garray[idxb[i]]; 2880 } 2881 } 2882 2883 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2884 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2885 ierr = PetscFree(idxb);CHKERRQ(ierr); 2886 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2887 PetscFunctionReturn(0); 2888 } 2889 2890 #undef __FUNCT__ 2891 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2892 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2893 { 2894 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2895 PetscInt n = A->rmap->n; 2896 PetscInt cstart = A->cmap->rstart; 2897 PetscInt *cmap = mat->garray; 2898 PetscInt *diagIdx, *offdiagIdx; 2899 Vec diagV, offdiagV; 2900 PetscScalar *a, *diagA, *offdiagA; 2901 PetscInt r; 2902 PetscErrorCode ierr; 2903 2904 PetscFunctionBegin; 2905 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2906 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2907 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2908 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2909 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2910 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2911 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2912 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2913 for(r = 0; r < n; ++r) { 2914 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2915 a[r] = diagA[r]; 2916 idx[r] = cstart + diagIdx[r]; 2917 } else { 2918 a[r] = offdiagA[r]; 2919 idx[r] = cmap[offdiagIdx[r]]; 2920 } 2921 } 2922 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2923 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2924 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2925 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2926 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2927 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2928 PetscFunctionReturn(0); 2929 } 2930 2931 #undef __FUNCT__ 2932 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2933 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2934 { 2935 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2936 PetscInt n = A->rmap->n; 2937 PetscInt cstart = A->cmap->rstart; 2938 PetscInt *cmap = mat->garray; 2939 PetscInt *diagIdx, *offdiagIdx; 2940 Vec diagV, offdiagV; 2941 PetscScalar *a, *diagA, *offdiagA; 2942 PetscInt r; 2943 PetscErrorCode ierr; 2944 2945 PetscFunctionBegin; 2946 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2947 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2948 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2949 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2950 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2951 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2952 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2953 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2954 for(r = 0; r < n; ++r) { 2955 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2956 a[r] = diagA[r]; 2957 idx[r] = cstart + diagIdx[r]; 2958 } else { 2959 a[r] = offdiagA[r]; 2960 idx[r] = cmap[offdiagIdx[r]]; 2961 } 2962 } 2963 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2964 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2966 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2967 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2968 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2974 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2975 { 2976 PetscErrorCode ierr; 2977 Mat *dummy; 2978 2979 PetscFunctionBegin; 2980 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2981 *newmat = *dummy; 2982 ierr = PetscFree(dummy);CHKERRQ(ierr); 2983 PetscFunctionReturn(0); 2984 } 2985 2986 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2990 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,PetscScalar **values) 2991 { 2992 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2993 PetscErrorCode ierr; 2994 2995 PetscFunctionBegin; 2996 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2997 PetscFunctionReturn(0); 2998 } 2999 3000 3001 /* -------------------------------------------------------------------*/ 3002 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3003 MatGetRow_MPIAIJ, 3004 MatRestoreRow_MPIAIJ, 3005 MatMult_MPIAIJ, 3006 /* 4*/ MatMultAdd_MPIAIJ, 3007 MatMultTranspose_MPIAIJ, 3008 MatMultTransposeAdd_MPIAIJ, 3009 #ifdef PETSC_HAVE_PBGL 3010 MatSolve_MPIAIJ, 3011 #else 3012 0, 3013 #endif 3014 0, 3015 0, 3016 /*10*/ 0, 3017 0, 3018 0, 3019 MatSOR_MPIAIJ, 3020 MatTranspose_MPIAIJ, 3021 /*15*/ MatGetInfo_MPIAIJ, 3022 MatEqual_MPIAIJ, 3023 MatGetDiagonal_MPIAIJ, 3024 MatDiagonalScale_MPIAIJ, 3025 MatNorm_MPIAIJ, 3026 /*20*/ MatAssemblyBegin_MPIAIJ, 3027 MatAssemblyEnd_MPIAIJ, 3028 MatSetOption_MPIAIJ, 3029 MatZeroEntries_MPIAIJ, 3030 /*24*/ MatZeroRows_MPIAIJ, 3031 0, 3032 #ifdef PETSC_HAVE_PBGL 3033 0, 3034 #else 3035 0, 3036 #endif 3037 0, 3038 0, 3039 /*29*/ MatSetUp_MPIAIJ, 3040 #ifdef PETSC_HAVE_PBGL 3041 0, 3042 #else 3043 0, 3044 #endif 3045 0, 3046 0, 3047 0, 3048 /*34*/ MatDuplicate_MPIAIJ, 3049 0, 3050 0, 3051 0, 3052 0, 3053 /*39*/ MatAXPY_MPIAIJ, 3054 MatGetSubMatrices_MPIAIJ, 3055 MatIncreaseOverlap_MPIAIJ, 3056 MatGetValues_MPIAIJ, 3057 MatCopy_MPIAIJ, 3058 /*44*/ MatGetRowMax_MPIAIJ, 3059 MatScale_MPIAIJ, 3060 0, 3061 0, 3062 MatZeroRowsColumns_MPIAIJ, 3063 /*49*/ 0, 3064 0, 3065 0, 3066 0, 3067 0, 3068 /*54*/ MatFDColoringCreate_MPIAIJ, 3069 0, 3070 MatSetUnfactored_MPIAIJ, 3071 MatPermute_MPIAIJ, 3072 0, 3073 /*59*/ MatGetSubMatrix_MPIAIJ, 3074 MatDestroy_MPIAIJ, 3075 MatView_MPIAIJ, 3076 0, 3077 0, 3078 /*64*/ 0, 3079 0, 3080 0, 3081 0, 3082 0, 3083 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3084 MatGetRowMinAbs_MPIAIJ, 3085 0, 3086 MatSetColoring_MPIAIJ, 3087 #if defined(PETSC_HAVE_ADIC) 3088 MatSetValuesAdic_MPIAIJ, 3089 #else 3090 0, 3091 #endif 3092 MatSetValuesAdifor_MPIAIJ, 3093 /*75*/ MatFDColoringApply_AIJ, 3094 0, 3095 0, 3096 0, 3097 0, 3098 /*80*/ 0, 3099 0, 3100 0, 3101 /*83*/ MatLoad_MPIAIJ, 3102 0, 3103 0, 3104 0, 3105 0, 3106 0, 3107 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3108 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3109 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3110 MatPtAP_Basic, 3111 MatPtAPSymbolic_MPIAIJ, 3112 /*94*/ MatPtAPNumeric_MPIAIJ, 3113 0, 3114 0, 3115 0, 3116 0, 3117 /*99*/ 0, 3118 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3119 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3120 MatConjugate_MPIAIJ, 3121 0, 3122 /*104*/MatSetValuesRow_MPIAIJ, 3123 MatRealPart_MPIAIJ, 3124 MatImaginaryPart_MPIAIJ, 3125 0, 3126 0, 3127 /*109*/0, 3128 MatGetRedundantMatrix_MPIAIJ, 3129 MatGetRowMin_MPIAIJ, 3130 0, 3131 0, 3132 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3133 0, 3134 0, 3135 0, 3136 0, 3137 /*119*/0, 3138 0, 3139 0, 3140 0, 3141 MatGetMultiProcBlock_MPIAIJ, 3142 /*124*/MatFindNonZeroRows_MPIAIJ, 3143 MatGetColumnNorms_MPIAIJ, 3144 MatInvertBlockDiagonal_MPIAIJ, 3145 0, 3146 MatGetSubMatricesParallel_MPIAIJ, 3147 /*129*/0, 3148 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3149 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3150 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3151 0, 3152 /*134*/0, 3153 0, 3154 0, 3155 0, 3156 0 3157 }; 3158 3159 /* ----------------------------------------------------------------------------------------*/ 3160 3161 EXTERN_C_BEGIN 3162 #undef __FUNCT__ 3163 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3164 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3165 { 3166 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3171 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 EXTERN_C_END 3175 3176 EXTERN_C_BEGIN 3177 #undef __FUNCT__ 3178 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3179 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3180 { 3181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3182 PetscErrorCode ierr; 3183 3184 PetscFunctionBegin; 3185 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3186 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3187 PetscFunctionReturn(0); 3188 } 3189 EXTERN_C_END 3190 3191 EXTERN_C_BEGIN 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3194 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3195 { 3196 Mat_MPIAIJ *b; 3197 PetscErrorCode ierr; 3198 PetscInt i; 3199 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3200 3201 PetscFunctionBegin; 3202 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3203 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3204 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3205 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3206 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3207 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3208 3209 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3210 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3211 if (d_nnz) { 3212 for (i=0; i<B->rmap->n; i++) { 3213 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3214 } 3215 } 3216 if (o_nnz) { 3217 for (i=0; i<B->rmap->n; i++) { 3218 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3219 } 3220 } 3221 b = (Mat_MPIAIJ*)B->data; 3222 3223 if (!B->preallocated) { 3224 /* Explicitly create 2 MATSEQAIJ matrices. */ 3225 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3226 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3227 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3228 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3229 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3230 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3231 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3232 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3233 } 3234 3235 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3236 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3237 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3238 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3239 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3240 B->preallocated = PETSC_TRUE; 3241 PetscFunctionReturn(0); 3242 } 3243 EXTERN_C_END 3244 3245 #undef __FUNCT__ 3246 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3247 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3248 { 3249 Mat mat; 3250 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3251 PetscErrorCode ierr; 3252 3253 PetscFunctionBegin; 3254 *newmat = 0; 3255 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3256 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3257 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3258 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3259 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3260 a = (Mat_MPIAIJ*)mat->data; 3261 3262 mat->factortype = matin->factortype; 3263 mat->rmap->bs = matin->rmap->bs; 3264 mat->cmap->bs = matin->cmap->bs; 3265 mat->assembled = PETSC_TRUE; 3266 mat->insertmode = NOT_SET_VALUES; 3267 mat->preallocated = PETSC_TRUE; 3268 3269 a->size = oldmat->size; 3270 a->rank = oldmat->rank; 3271 a->donotstash = oldmat->donotstash; 3272 a->roworiented = oldmat->roworiented; 3273 a->rowindices = 0; 3274 a->rowvalues = 0; 3275 a->getrowactive = PETSC_FALSE; 3276 3277 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3278 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3279 3280 if (oldmat->colmap) { 3281 #if defined (PETSC_USE_CTABLE) 3282 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3283 #else 3284 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3285 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3286 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3287 #endif 3288 } else a->colmap = 0; 3289 if (oldmat->garray) { 3290 PetscInt len; 3291 len = oldmat->B->cmap->n; 3292 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3293 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3294 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3295 } else a->garray = 0; 3296 3297 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3298 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3299 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3300 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3301 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3302 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3303 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3304 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3305 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3306 *newmat = mat; 3307 PetscFunctionReturn(0); 3308 } 3309 3310 3311 3312 #undef __FUNCT__ 3313 #define __FUNCT__ "MatLoad_MPIAIJ" 3314 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3315 { 3316 PetscScalar *vals,*svals; 3317 MPI_Comm comm = ((PetscObject)viewer)->comm; 3318 PetscErrorCode ierr; 3319 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3320 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3321 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3322 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3323 PetscInt cend,cstart,n,*rowners,sizesset=1; 3324 int fd; 3325 3326 PetscFunctionBegin; 3327 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3328 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3329 if (!rank) { 3330 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3331 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3332 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3333 } 3334 3335 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3336 3337 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3338 M = header[1]; N = header[2]; 3339 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3340 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3341 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3342 3343 /* If global sizes are set, check if they are consistent with that given in the file */ 3344 if (sizesset) { 3345 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3346 } 3347 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3348 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3349 3350 /* determine ownership of all rows */ 3351 if (newMat->rmap->n < 0 ) m = M/size + ((M % size) > rank); /* PETSC_DECIDE */ 3352 else m = newMat->rmap->n; /* Set by user */ 3353 3354 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3355 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3356 3357 /* First process needs enough room for process with most rows */ 3358 if (!rank) { 3359 mmax = rowners[1]; 3360 for (i=2; i<size; i++) { 3361 mmax = PetscMax(mmax,rowners[i]); 3362 } 3363 } else mmax = m; 3364 3365 rowners[0] = 0; 3366 for (i=2; i<=size; i++) { 3367 rowners[i] += rowners[i-1]; 3368 } 3369 rstart = rowners[rank]; 3370 rend = rowners[rank+1]; 3371 3372 /* distribute row lengths to all processors */ 3373 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3374 if (!rank) { 3375 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3376 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3377 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3378 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3379 for (j=0; j<m; j++) { 3380 procsnz[0] += ourlens[j]; 3381 } 3382 for (i=1; i<size; i++) { 3383 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3384 /* calculate the number of nonzeros on each processor */ 3385 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3386 procsnz[i] += rowlengths[j]; 3387 } 3388 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3389 } 3390 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3391 } else { 3392 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3393 } 3394 3395 if (!rank) { 3396 /* determine max buffer needed and allocate it */ 3397 maxnz = 0; 3398 for (i=0; i<size; i++) { 3399 maxnz = PetscMax(maxnz,procsnz[i]); 3400 } 3401 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3402 3403 /* read in my part of the matrix column indices */ 3404 nz = procsnz[0]; 3405 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3406 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3407 3408 /* read in every one elses and ship off */ 3409 for (i=1; i<size; i++) { 3410 nz = procsnz[i]; 3411 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3412 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3413 } 3414 ierr = PetscFree(cols);CHKERRQ(ierr); 3415 } else { 3416 /* determine buffer space needed for message */ 3417 nz = 0; 3418 for (i=0; i<m; i++) { 3419 nz += ourlens[i]; 3420 } 3421 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3422 3423 /* receive message of column indices*/ 3424 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3425 } 3426 3427 /* determine column ownership if matrix is not square */ 3428 if (N != M) { 3429 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3430 else n = newMat->cmap->n; 3431 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3432 cstart = cend - n; 3433 } else { 3434 cstart = rstart; 3435 cend = rend; 3436 n = cend - cstart; 3437 } 3438 3439 /* loop over local rows, determining number of off diagonal entries */ 3440 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3441 jj = 0; 3442 for (i=0; i<m; i++) { 3443 for (j=0; j<ourlens[i]; j++) { 3444 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3445 jj++; 3446 } 3447 } 3448 3449 for (i=0; i<m; i++) { 3450 ourlens[i] -= offlens[i]; 3451 } 3452 if (!sizesset) { 3453 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3454 } 3455 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3456 3457 for (i=0; i<m; i++) { 3458 ourlens[i] += offlens[i]; 3459 } 3460 3461 if (!rank) { 3462 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3463 3464 /* read in my part of the matrix numerical values */ 3465 nz = procsnz[0]; 3466 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3467 3468 /* insert into matrix */ 3469 jj = rstart; 3470 smycols = mycols; 3471 svals = vals; 3472 for (i=0; i<m; i++) { 3473 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3474 smycols += ourlens[i]; 3475 svals += ourlens[i]; 3476 jj++; 3477 } 3478 3479 /* read in other processors and ship out */ 3480 for (i=1; i<size; i++) { 3481 nz = procsnz[i]; 3482 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3483 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3486 } else { 3487 /* receive numeric values */ 3488 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3489 3490 /* receive message of values*/ 3491 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3492 3493 /* insert into matrix */ 3494 jj = rstart; 3495 smycols = mycols; 3496 svals = vals; 3497 for (i=0; i<m; i++) { 3498 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3499 smycols += ourlens[i]; 3500 svals += ourlens[i]; 3501 jj++; 3502 } 3503 } 3504 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3505 ierr = PetscFree(vals);CHKERRQ(ierr); 3506 ierr = PetscFree(mycols);CHKERRQ(ierr); 3507 ierr = PetscFree(rowners);CHKERRQ(ierr); 3508 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3509 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3510 PetscFunctionReturn(0); 3511 } 3512 3513 #undef __FUNCT__ 3514 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3515 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3516 { 3517 PetscErrorCode ierr; 3518 IS iscol_local; 3519 PetscInt csize; 3520 3521 PetscFunctionBegin; 3522 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3523 if (call == MAT_REUSE_MATRIX) { 3524 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3525 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3526 } else { 3527 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3528 } 3529 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3530 if (call == MAT_INITIAL_MATRIX) { 3531 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3532 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3533 } 3534 PetscFunctionReturn(0); 3535 } 3536 3537 #undef __FUNCT__ 3538 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3539 /* 3540 Not great since it makes two copies of the submatrix, first an SeqAIJ 3541 in local and then by concatenating the local matrices the end result. 3542 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3543 3544 Note: This requires a sequential iscol with all indices. 3545 */ 3546 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3547 { 3548 PetscErrorCode ierr; 3549 PetscMPIInt rank,size; 3550 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3551 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3552 Mat *local,M,Mreuse; 3553 MatScalar *vwork,*aa; 3554 MPI_Comm comm = ((PetscObject)mat)->comm; 3555 Mat_SeqAIJ *aij; 3556 3557 3558 PetscFunctionBegin; 3559 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3560 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3561 3562 if (call == MAT_REUSE_MATRIX) { 3563 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3564 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3565 local = &Mreuse; 3566 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr); 3567 } else { 3568 ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr); 3569 Mreuse = *local; 3570 ierr = PetscFree(local);CHKERRQ(ierr); 3571 } 3572 3573 /* 3574 m - number of local rows 3575 n - number of columns (same on all processors) 3576 rstart - first row in new global matrix generated 3577 */ 3578 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3579 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3580 if (call == MAT_INITIAL_MATRIX) { 3581 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3582 ii = aij->i; 3583 jj = aij->j; 3584 3585 /* 3586 Determine the number of non-zeros in the diagonal and off-diagonal 3587 portions of the matrix in order to do correct preallocation 3588 */ 3589 3590 /* first get start and end of "diagonal" columns */ 3591 if (csize == PETSC_DECIDE) { 3592 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3593 if (mglobal == n) { /* square matrix */ 3594 nlocal = m; 3595 } else { 3596 nlocal = n/size + ((n % size) > rank); 3597 } 3598 } else { 3599 nlocal = csize; 3600 } 3601 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3602 rstart = rend - nlocal; 3603 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3604 3605 /* next, compute all the lengths */ 3606 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3607 olens = dlens + m; 3608 for (i=0; i<m; i++) { 3609 jend = ii[i+1] - ii[i]; 3610 olen = 0; 3611 dlen = 0; 3612 for (j=0; j<jend; j++) { 3613 if (*jj < rstart || *jj >= rend) olen++; 3614 else dlen++; 3615 jj++; 3616 } 3617 olens[i] = olen; 3618 dlens[i] = dlen; 3619 } 3620 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3621 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3622 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3623 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3624 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3625 ierr = PetscFree(dlens);CHKERRQ(ierr); 3626 } else { 3627 PetscInt ml,nl; 3628 3629 M = *newmat; 3630 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3631 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3632 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3633 /* 3634 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3635 rather than the slower MatSetValues(). 3636 */ 3637 M->was_assembled = PETSC_TRUE; 3638 M->assembled = PETSC_FALSE; 3639 } 3640 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3641 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3642 ii = aij->i; 3643 jj = aij->j; 3644 aa = aij->a; 3645 for (i=0; i<m; i++) { 3646 row = rstart + i; 3647 nz = ii[i+1] - ii[i]; 3648 cwork = jj; jj += nz; 3649 vwork = aa; aa += nz; 3650 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3651 } 3652 3653 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3654 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3655 *newmat = M; 3656 3657 /* save submatrix used in processor for next request */ 3658 if (call == MAT_INITIAL_MATRIX) { 3659 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3660 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3661 } 3662 3663 PetscFunctionReturn(0); 3664 } 3665 3666 EXTERN_C_BEGIN 3667 #undef __FUNCT__ 3668 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3669 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3670 { 3671 PetscInt m,cstart, cend,j,nnz,i,d; 3672 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3673 const PetscInt *JJ; 3674 PetscScalar *values; 3675 PetscErrorCode ierr; 3676 3677 PetscFunctionBegin; 3678 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3679 3680 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3681 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3682 m = B->rmap->n; 3683 cstart = B->cmap->rstart; 3684 cend = B->cmap->rend; 3685 rstart = B->rmap->rstart; 3686 3687 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3688 3689 #if defined(PETSC_USE_DEBUGGING) 3690 for (i=0; i<m; i++) { 3691 nnz = Ii[i+1]- Ii[i]; 3692 JJ = J + Ii[i]; 3693 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3694 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3695 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3696 } 3697 #endif 3698 3699 for (i=0; i<m; i++) { 3700 nnz = Ii[i+1]- Ii[i]; 3701 JJ = J + Ii[i]; 3702 nnz_max = PetscMax(nnz_max,nnz); 3703 d = 0; 3704 for (j=0; j<nnz; j++) { 3705 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3706 } 3707 d_nnz[i] = d; 3708 o_nnz[i] = nnz - d; 3709 } 3710 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3711 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3712 3713 if (v) values = (PetscScalar*)v; 3714 else { 3715 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3716 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3717 } 3718 3719 for (i=0; i<m; i++) { 3720 ii = i + rstart; 3721 nnz = Ii[i+1]- Ii[i]; 3722 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3723 } 3724 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3725 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3726 3727 if (!v) { 3728 ierr = PetscFree(values);CHKERRQ(ierr); 3729 } 3730 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3731 PetscFunctionReturn(0); 3732 } 3733 EXTERN_C_END 3734 3735 #undef __FUNCT__ 3736 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3737 /*@ 3738 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3739 (the default parallel PETSc format). 3740 3741 Collective on MPI_Comm 3742 3743 Input Parameters: 3744 + B - the matrix 3745 . i - the indices into j for the start of each local row (starts with zero) 3746 . j - the column indices for each local row (starts with zero) 3747 - v - optional values in the matrix 3748 3749 Level: developer 3750 3751 Notes: 3752 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3753 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3754 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3755 3756 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3757 3758 The format which is used for the sparse matrix input, is equivalent to a 3759 row-major ordering.. i.e for the following matrix, the input data expected is 3760 as shown: 3761 3762 1 0 0 3763 2 0 3 P0 3764 ------- 3765 4 5 6 P1 3766 3767 Process0 [P0]: rows_owned=[0,1] 3768 i = {0,1,3} [size = nrow+1 = 2+1] 3769 j = {0,0,2} [size = nz = 6] 3770 v = {1,2,3} [size = nz = 6] 3771 3772 Process1 [P1]: rows_owned=[2] 3773 i = {0,3} [size = nrow+1 = 1+1] 3774 j = {0,1,2} [size = nz = 6] 3775 v = {4,5,6} [size = nz = 6] 3776 3777 .keywords: matrix, aij, compressed row, sparse, parallel 3778 3779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3780 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3781 @*/ 3782 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3783 { 3784 PetscErrorCode ierr; 3785 3786 PetscFunctionBegin; 3787 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3788 PetscFunctionReturn(0); 3789 } 3790 3791 #undef __FUNCT__ 3792 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3793 /*@C 3794 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3795 (the default parallel PETSc format). For good matrix assembly performance 3796 the user should preallocate the matrix storage by setting the parameters 3797 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3798 performance can be increased by more than a factor of 50. 3799 3800 Collective on MPI_Comm 3801 3802 Input Parameters: 3803 + A - the matrix 3804 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3805 (same value is used for all local rows) 3806 . d_nnz - array containing the number of nonzeros in the various rows of the 3807 DIAGONAL portion of the local submatrix (possibly different for each row) 3808 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3809 The size of this array is equal to the number of local rows, i.e 'm'. 3810 For matrices that will be factored, you must leave room for (and set) 3811 the diagonal entry even if it is zero. 3812 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3813 submatrix (same value is used for all local rows). 3814 - o_nnz - array containing the number of nonzeros in the various rows of the 3815 OFF-DIAGONAL portion of the local submatrix (possibly different for 3816 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3817 structure. The size of this array is equal to the number 3818 of local rows, i.e 'm'. 3819 3820 If the *_nnz parameter is given then the *_nz parameter is ignored 3821 3822 The AIJ format (also called the Yale sparse matrix format or 3823 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3824 storage. The stored row and column indices begin with zero. 3825 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3826 3827 The parallel matrix is partitioned such that the first m0 rows belong to 3828 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3829 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3830 3831 The DIAGONAL portion of the local submatrix of a processor can be defined 3832 as the submatrix which is obtained by extraction the part corresponding to 3833 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3834 first row that belongs to the processor, r2 is the last row belonging to 3835 the this processor, and c1-c2 is range of indices of the local part of a 3836 vector suitable for applying the matrix to. This is an mxn matrix. In the 3837 common case of a square matrix, the row and column ranges are the same and 3838 the DIAGONAL part is also square. The remaining portion of the local 3839 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3840 3841 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3842 3843 You can call MatGetInfo() to get information on how effective the preallocation was; 3844 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3845 You can also run with the option -info and look for messages with the string 3846 malloc in them to see if additional memory allocation was needed. 3847 3848 Example usage: 3849 3850 Consider the following 8x8 matrix with 34 non-zero values, that is 3851 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3852 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3853 as follows: 3854 3855 .vb 3856 1 2 0 | 0 3 0 | 0 4 3857 Proc0 0 5 6 | 7 0 0 | 8 0 3858 9 0 10 | 11 0 0 | 12 0 3859 ------------------------------------- 3860 13 0 14 | 15 16 17 | 0 0 3861 Proc1 0 18 0 | 19 20 21 | 0 0 3862 0 0 0 | 22 23 0 | 24 0 3863 ------------------------------------- 3864 Proc2 25 26 27 | 0 0 28 | 29 0 3865 30 0 0 | 31 32 33 | 0 34 3866 .ve 3867 3868 This can be represented as a collection of submatrices as: 3869 3870 .vb 3871 A B C 3872 D E F 3873 G H I 3874 .ve 3875 3876 Where the submatrices A,B,C are owned by proc0, D,E,F are 3877 owned by proc1, G,H,I are owned by proc2. 3878 3879 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3880 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3881 The 'M','N' parameters are 8,8, and have the same values on all procs. 3882 3883 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3884 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3885 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3886 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3887 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3888 matrix, ans [DF] as another SeqAIJ matrix. 3889 3890 When d_nz, o_nz parameters are specified, d_nz storage elements are 3891 allocated for every row of the local diagonal submatrix, and o_nz 3892 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3893 One way to choose d_nz and o_nz is to use the max nonzerors per local 3894 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3895 In this case, the values of d_nz,o_nz are: 3896 .vb 3897 proc0 : dnz = 2, o_nz = 2 3898 proc1 : dnz = 3, o_nz = 2 3899 proc2 : dnz = 1, o_nz = 4 3900 .ve 3901 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3902 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3903 for proc3. i.e we are using 12+15+10=37 storage locations to store 3904 34 values. 3905 3906 When d_nnz, o_nnz parameters are specified, the storage is specified 3907 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3908 In the above case the values for d_nnz,o_nnz are: 3909 .vb 3910 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3911 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3912 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3913 .ve 3914 Here the space allocated is sum of all the above values i.e 34, and 3915 hence pre-allocation is perfect. 3916 3917 Level: intermediate 3918 3919 .keywords: matrix, aij, compressed row, sparse, parallel 3920 3921 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3922 MPIAIJ, MatGetInfo() 3923 @*/ 3924 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3925 { 3926 PetscErrorCode ierr; 3927 3928 PetscFunctionBegin; 3929 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3930 PetscValidType(B,1); 3931 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3932 PetscFunctionReturn(0); 3933 } 3934 3935 #undef __FUNCT__ 3936 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3937 /*@ 3938 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3939 CSR format the local rows. 3940 3941 Collective on MPI_Comm 3942 3943 Input Parameters: 3944 + comm - MPI communicator 3945 . m - number of local rows (Cannot be PETSC_DECIDE) 3946 . n - This value should be the same as the local size used in creating the 3947 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3948 calculated if N is given) For square matrices n is almost always m. 3949 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3950 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3951 . i - row indices 3952 . j - column indices 3953 - a - matrix values 3954 3955 Output Parameter: 3956 . mat - the matrix 3957 3958 Level: intermediate 3959 3960 Notes: 3961 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3962 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3963 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3964 3965 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3966 3967 The format which is used for the sparse matrix input, is equivalent to a 3968 row-major ordering.. i.e for the following matrix, the input data expected is 3969 as shown: 3970 3971 1 0 0 3972 2 0 3 P0 3973 ------- 3974 4 5 6 P1 3975 3976 Process0 [P0]: rows_owned=[0,1] 3977 i = {0,1,3} [size = nrow+1 = 2+1] 3978 j = {0,0,2} [size = nz = 6] 3979 v = {1,2,3} [size = nz = 6] 3980 3981 Process1 [P1]: rows_owned=[2] 3982 i = {0,3} [size = nrow+1 = 1+1] 3983 j = {0,1,2} [size = nz = 6] 3984 v = {4,5,6} [size = nz = 6] 3985 3986 .keywords: matrix, aij, compressed row, sparse, parallel 3987 3988 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3989 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3990 @*/ 3991 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3992 { 3993 PetscErrorCode ierr; 3994 3995 PetscFunctionBegin; 3996 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3997 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3998 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3999 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4000 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4001 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4002 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4003 PetscFunctionReturn(0); 4004 } 4005 4006 #undef __FUNCT__ 4007 #define __FUNCT__ "MatCreateAIJ" 4008 /*@C 4009 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4010 (the default parallel PETSc format). For good matrix assembly performance 4011 the user should preallocate the matrix storage by setting the parameters 4012 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4013 performance can be increased by more than a factor of 50. 4014 4015 Collective on MPI_Comm 4016 4017 Input Parameters: 4018 + comm - MPI communicator 4019 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4020 This value should be the same as the local size used in creating the 4021 y vector for the matrix-vector product y = Ax. 4022 . n - This value should be the same as the local size used in creating the 4023 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4024 calculated if N is given) For square matrices n is almost always m. 4025 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4026 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4027 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4028 (same value is used for all local rows) 4029 . d_nnz - array containing the number of nonzeros in the various rows of the 4030 DIAGONAL portion of the local submatrix (possibly different for each row) 4031 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4032 The size of this array is equal to the number of local rows, i.e 'm'. 4033 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4034 submatrix (same value is used for all local rows). 4035 - o_nnz - array containing the number of nonzeros in the various rows of the 4036 OFF-DIAGONAL portion of the local submatrix (possibly different for 4037 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4038 structure. The size of this array is equal to the number 4039 of local rows, i.e 'm'. 4040 4041 Output Parameter: 4042 . A - the matrix 4043 4044 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4045 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4046 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4047 4048 Notes: 4049 If the *_nnz parameter is given then the *_nz parameter is ignored 4050 4051 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4052 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4053 storage requirements for this matrix. 4054 4055 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4056 processor than it must be used on all processors that share the object for 4057 that argument. 4058 4059 The user MUST specify either the local or global matrix dimensions 4060 (possibly both). 4061 4062 The parallel matrix is partitioned across processors such that the 4063 first m0 rows belong to process 0, the next m1 rows belong to 4064 process 1, the next m2 rows belong to process 2 etc.. where 4065 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4066 values corresponding to [m x N] submatrix. 4067 4068 The columns are logically partitioned with the n0 columns belonging 4069 to 0th partition, the next n1 columns belonging to the next 4070 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4071 4072 The DIAGONAL portion of the local submatrix on any given processor 4073 is the submatrix corresponding to the rows and columns m,n 4074 corresponding to the given processor. i.e diagonal matrix on 4075 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4076 etc. The remaining portion of the local submatrix [m x (N-n)] 4077 constitute the OFF-DIAGONAL portion. The example below better 4078 illustrates this concept. 4079 4080 For a square global matrix we define each processor's diagonal portion 4081 to be its local rows and the corresponding columns (a square submatrix); 4082 each processor's off-diagonal portion encompasses the remainder of the 4083 local matrix (a rectangular submatrix). 4084 4085 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4086 4087 When calling this routine with a single process communicator, a matrix of 4088 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4089 type of communicator, use the construction mechanism: 4090 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4091 4092 By default, this format uses inodes (identical nodes) when possible. 4093 We search for consecutive rows with the same nonzero structure, thereby 4094 reusing matrix information to achieve increased efficiency. 4095 4096 Options Database Keys: 4097 + -mat_no_inode - Do not use inodes 4098 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4099 - -mat_aij_oneindex - Internally use indexing starting at 1 4100 rather than 0. Note that when calling MatSetValues(), 4101 the user still MUST index entries starting at 0! 4102 4103 4104 Example usage: 4105 4106 Consider the following 8x8 matrix with 34 non-zero values, that is 4107 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4108 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4109 as follows: 4110 4111 .vb 4112 1 2 0 | 0 3 0 | 0 4 4113 Proc0 0 5 6 | 7 0 0 | 8 0 4114 9 0 10 | 11 0 0 | 12 0 4115 ------------------------------------- 4116 13 0 14 | 15 16 17 | 0 0 4117 Proc1 0 18 0 | 19 20 21 | 0 0 4118 0 0 0 | 22 23 0 | 24 0 4119 ------------------------------------- 4120 Proc2 25 26 27 | 0 0 28 | 29 0 4121 30 0 0 | 31 32 33 | 0 34 4122 .ve 4123 4124 This can be represented as a collection of submatrices as: 4125 4126 .vb 4127 A B C 4128 D E F 4129 G H I 4130 .ve 4131 4132 Where the submatrices A,B,C are owned by proc0, D,E,F are 4133 owned by proc1, G,H,I are owned by proc2. 4134 4135 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4136 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4137 The 'M','N' parameters are 8,8, and have the same values on all procs. 4138 4139 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4140 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4141 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4142 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4143 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4144 matrix, ans [DF] as another SeqAIJ matrix. 4145 4146 When d_nz, o_nz parameters are specified, d_nz storage elements are 4147 allocated for every row of the local diagonal submatrix, and o_nz 4148 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4149 One way to choose d_nz and o_nz is to use the max nonzerors per local 4150 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4151 In this case, the values of d_nz,o_nz are: 4152 .vb 4153 proc0 : dnz = 2, o_nz = 2 4154 proc1 : dnz = 3, o_nz = 2 4155 proc2 : dnz = 1, o_nz = 4 4156 .ve 4157 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4158 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4159 for proc3. i.e we are using 12+15+10=37 storage locations to store 4160 34 values. 4161 4162 When d_nnz, o_nnz parameters are specified, the storage is specified 4163 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4164 In the above case the values for d_nnz,o_nnz are: 4165 .vb 4166 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4167 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4168 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4169 .ve 4170 Here the space allocated is sum of all the above values i.e 34, and 4171 hence pre-allocation is perfect. 4172 4173 Level: intermediate 4174 4175 .keywords: matrix, aij, compressed row, sparse, parallel 4176 4177 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4178 MPIAIJ, MatCreateMPIAIJWithArrays() 4179 @*/ 4180 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4181 { 4182 PetscErrorCode ierr; 4183 PetscMPIInt size; 4184 4185 PetscFunctionBegin; 4186 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4187 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4188 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4189 if (size > 1) { 4190 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4191 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4192 } else { 4193 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4194 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4195 } 4196 PetscFunctionReturn(0); 4197 } 4198 4199 #undef __FUNCT__ 4200 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4201 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4202 { 4203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4204 4205 PetscFunctionBegin; 4206 *Ad = a->A; 4207 *Ao = a->B; 4208 *colmap = a->garray; 4209 PetscFunctionReturn(0); 4210 } 4211 4212 #undef __FUNCT__ 4213 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4214 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4215 { 4216 PetscErrorCode ierr; 4217 PetscInt i; 4218 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4219 4220 PetscFunctionBegin; 4221 if (coloring->ctype == IS_COLORING_GLOBAL) { 4222 ISColoringValue *allcolors,*colors; 4223 ISColoring ocoloring; 4224 4225 /* set coloring for diagonal portion */ 4226 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4227 4228 /* set coloring for off-diagonal portion */ 4229 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4230 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4231 for (i=0; i<a->B->cmap->n; i++) { 4232 colors[i] = allcolors[a->garray[i]]; 4233 } 4234 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4235 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4236 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4237 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4238 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4239 ISColoringValue *colors; 4240 PetscInt *larray; 4241 ISColoring ocoloring; 4242 4243 /* set coloring for diagonal portion */ 4244 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4245 for (i=0; i<a->A->cmap->n; i++) { 4246 larray[i] = i + A->cmap->rstart; 4247 } 4248 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4249 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4250 for (i=0; i<a->A->cmap->n; i++) { 4251 colors[i] = coloring->colors[larray[i]]; 4252 } 4253 ierr = PetscFree(larray);CHKERRQ(ierr); 4254 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4255 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4256 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4257 4258 /* set coloring for off-diagonal portion */ 4259 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4260 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4261 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4262 for (i=0; i<a->B->cmap->n; i++) { 4263 colors[i] = coloring->colors[larray[i]]; 4264 } 4265 ierr = PetscFree(larray);CHKERRQ(ierr); 4266 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4267 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4268 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4269 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4270 4271 PetscFunctionReturn(0); 4272 } 4273 4274 #if defined(PETSC_HAVE_ADIC) 4275 #undef __FUNCT__ 4276 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4277 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4278 { 4279 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4280 PetscErrorCode ierr; 4281 4282 PetscFunctionBegin; 4283 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4284 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4285 PetscFunctionReturn(0); 4286 } 4287 #endif 4288 4289 #undef __FUNCT__ 4290 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4291 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4292 { 4293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4294 PetscErrorCode ierr; 4295 4296 PetscFunctionBegin; 4297 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4298 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4299 PetscFunctionReturn(0); 4300 } 4301 4302 #undef __FUNCT__ 4303 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4304 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4305 { 4306 PetscErrorCode ierr; 4307 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4308 PetscInt *indx; 4309 4310 PetscFunctionBegin; 4311 /* This routine will ONLY return MPIAIJ type matrix */ 4312 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4313 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4314 if (n == PETSC_DECIDE){ 4315 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4316 } 4317 /* Check sum(n) = N */ 4318 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4319 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4320 4321 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4322 rstart -= m; 4323 4324 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4325 for (i=0;i<m;i++) { 4326 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4327 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4328 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4329 } 4330 4331 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4332 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4333 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4334 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4335 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4336 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4337 PetscFunctionReturn(0); 4338 } 4339 4340 #undef __FUNCT__ 4341 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4342 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4343 { 4344 PetscErrorCode ierr; 4345 PetscInt m,N,i,rstart,nnz,Ii; 4346 PetscInt *indx; 4347 PetscScalar *values; 4348 4349 PetscFunctionBegin; 4350 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4351 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4352 for (i=0;i<m;i++) { 4353 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4354 Ii = i + rstart; 4355 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4356 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4357 } 4358 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4359 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4360 PetscFunctionReturn(0); 4361 } 4362 4363 #undef __FUNCT__ 4364 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4365 /*@ 4366 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4367 matrices from each processor 4368 4369 Collective on MPI_Comm 4370 4371 Input Parameters: 4372 + comm - the communicators the parallel matrix will live on 4373 . inmat - the input sequential matrices 4374 . n - number of local columns (or PETSC_DECIDE) 4375 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4376 4377 Output Parameter: 4378 . outmat - the parallel matrix generated 4379 4380 Level: advanced 4381 4382 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4383 4384 @*/ 4385 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4386 { 4387 PetscErrorCode ierr; 4388 4389 PetscFunctionBegin; 4390 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4391 if (scall == MAT_INITIAL_MATRIX){ 4392 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4393 } 4394 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4395 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4396 PetscFunctionReturn(0); 4397 } 4398 4399 #undef __FUNCT__ 4400 #define __FUNCT__ "MatFileSplit" 4401 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4402 { 4403 PetscErrorCode ierr; 4404 PetscMPIInt rank; 4405 PetscInt m,N,i,rstart,nnz; 4406 size_t len; 4407 const PetscInt *indx; 4408 PetscViewer out; 4409 char *name; 4410 Mat B; 4411 const PetscScalar *values; 4412 4413 PetscFunctionBegin; 4414 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4415 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4416 /* Should this be the type of the diagonal block of A? */ 4417 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4418 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4419 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4420 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4421 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4422 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4423 for (i=0;i<m;i++) { 4424 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4425 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4426 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4427 } 4428 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4429 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4430 4431 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4432 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4433 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4434 sprintf(name,"%s.%d",outfile,rank); 4435 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4436 ierr = PetscFree(name); 4437 ierr = MatView(B,out);CHKERRQ(ierr); 4438 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4439 ierr = MatDestroy(&B);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4444 #undef __FUNCT__ 4445 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4446 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4447 { 4448 PetscErrorCode ierr; 4449 Mat_Merge_SeqsToMPI *merge; 4450 PetscContainer container; 4451 4452 PetscFunctionBegin; 4453 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4454 if (container) { 4455 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4456 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4457 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4458 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4459 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4460 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4461 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4462 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4463 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4464 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4465 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4466 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4467 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4468 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4469 ierr = PetscFree(merge);CHKERRQ(ierr); 4470 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4471 } 4472 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4473 PetscFunctionReturn(0); 4474 } 4475 4476 #include <../src/mat/utils/freespace.h> 4477 #include <petscbt.h> 4478 4479 #undef __FUNCT__ 4480 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4481 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4482 { 4483 PetscErrorCode ierr; 4484 MPI_Comm comm=((PetscObject)mpimat)->comm; 4485 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4486 PetscMPIInt size,rank,taga,*len_s; 4487 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4488 PetscInt proc,m; 4489 PetscInt **buf_ri,**buf_rj; 4490 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4491 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4492 MPI_Request *s_waits,*r_waits; 4493 MPI_Status *status; 4494 MatScalar *aa=a->a; 4495 MatScalar **abuf_r,*ba_i; 4496 Mat_Merge_SeqsToMPI *merge; 4497 PetscContainer container; 4498 4499 PetscFunctionBegin; 4500 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4501 4502 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4503 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4504 4505 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4506 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4507 4508 bi = merge->bi; 4509 bj = merge->bj; 4510 buf_ri = merge->buf_ri; 4511 buf_rj = merge->buf_rj; 4512 4513 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4514 owners = merge->rowmap->range; 4515 len_s = merge->len_s; 4516 4517 /* send and recv matrix values */ 4518 /*-----------------------------*/ 4519 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4520 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4521 4522 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4523 for (proc=0,k=0; proc<size; proc++){ 4524 if (!len_s[proc]) continue; 4525 i = owners[proc]; 4526 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4527 k++; 4528 } 4529 4530 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4531 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4532 ierr = PetscFree(status);CHKERRQ(ierr); 4533 4534 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4535 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4536 4537 /* insert mat values of mpimat */ 4538 /*----------------------------*/ 4539 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4540 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4541 4542 for (k=0; k<merge->nrecv; k++){ 4543 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4544 nrows = *(buf_ri_k[k]); 4545 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4546 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4547 } 4548 4549 /* set values of ba */ 4550 m = merge->rowmap->n; 4551 for (i=0; i<m; i++) { 4552 arow = owners[rank] + i; 4553 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4554 bnzi = bi[i+1] - bi[i]; 4555 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4556 4557 /* add local non-zero vals of this proc's seqmat into ba */ 4558 anzi = ai[arow+1] - ai[arow]; 4559 aj = a->j + ai[arow]; 4560 aa = a->a + ai[arow]; 4561 nextaj = 0; 4562 for (j=0; nextaj<anzi; j++){ 4563 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4564 ba_i[j] += aa[nextaj++]; 4565 } 4566 } 4567 4568 /* add received vals into ba */ 4569 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4570 /* i-th row */ 4571 if (i == *nextrow[k]) { 4572 anzi = *(nextai[k]+1) - *nextai[k]; 4573 aj = buf_rj[k] + *(nextai[k]); 4574 aa = abuf_r[k] + *(nextai[k]); 4575 nextaj = 0; 4576 for (j=0; nextaj<anzi; j++){ 4577 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4578 ba_i[j] += aa[nextaj++]; 4579 } 4580 } 4581 nextrow[k]++; nextai[k]++; 4582 } 4583 } 4584 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4585 } 4586 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4587 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4588 4589 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4590 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4591 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4592 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4593 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4594 PetscFunctionReturn(0); 4595 } 4596 4597 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4598 4599 #undef __FUNCT__ 4600 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4601 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4602 { 4603 PetscErrorCode ierr; 4604 Mat B_mpi; 4605 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4606 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4607 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4608 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4609 PetscInt len,proc,*dnz,*onz,bs,cbs; 4610 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4611 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4612 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4613 MPI_Status *status; 4614 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4615 PetscBT lnkbt; 4616 Mat_Merge_SeqsToMPI *merge; 4617 PetscContainer container; 4618 4619 PetscFunctionBegin; 4620 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4621 4622 /* make sure it is a PETSc comm */ 4623 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4624 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4625 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4626 4627 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4628 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4629 4630 /* determine row ownership */ 4631 /*---------------------------------------------------------*/ 4632 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4633 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4634 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4635 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4636 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4637 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4638 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4639 4640 m = merge->rowmap->n; 4641 M = merge->rowmap->N; 4642 owners = merge->rowmap->range; 4643 4644 /* determine the number of messages to send, their lengths */ 4645 /*---------------------------------------------------------*/ 4646 len_s = merge->len_s; 4647 4648 len = 0; /* length of buf_si[] */ 4649 merge->nsend = 0; 4650 for (proc=0; proc<size; proc++){ 4651 len_si[proc] = 0; 4652 if (proc == rank){ 4653 len_s[proc] = 0; 4654 } else { 4655 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4656 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4657 } 4658 if (len_s[proc]) { 4659 merge->nsend++; 4660 nrows = 0; 4661 for (i=owners[proc]; i<owners[proc+1]; i++){ 4662 if (ai[i+1] > ai[i]) nrows++; 4663 } 4664 len_si[proc] = 2*(nrows+1); 4665 len += len_si[proc]; 4666 } 4667 } 4668 4669 /* determine the number and length of messages to receive for ij-structure */ 4670 /*-------------------------------------------------------------------------*/ 4671 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4672 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4673 4674 /* post the Irecv of j-structure */ 4675 /*-------------------------------*/ 4676 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4677 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4678 4679 /* post the Isend of j-structure */ 4680 /*--------------------------------*/ 4681 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4682 4683 for (proc=0, k=0; proc<size; proc++){ 4684 if (!len_s[proc]) continue; 4685 i = owners[proc]; 4686 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4687 k++; 4688 } 4689 4690 /* receives and sends of j-structure are complete */ 4691 /*------------------------------------------------*/ 4692 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4693 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4694 4695 /* send and recv i-structure */ 4696 /*---------------------------*/ 4697 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4698 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4699 4700 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4701 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4702 for (proc=0,k=0; proc<size; proc++){ 4703 if (!len_s[proc]) continue; 4704 /* form outgoing message for i-structure: 4705 buf_si[0]: nrows to be sent 4706 [1:nrows]: row index (global) 4707 [nrows+1:2*nrows+1]: i-structure index 4708 */ 4709 /*-------------------------------------------*/ 4710 nrows = len_si[proc]/2 - 1; 4711 buf_si_i = buf_si + nrows+1; 4712 buf_si[0] = nrows; 4713 buf_si_i[0] = 0; 4714 nrows = 0; 4715 for (i=owners[proc]; i<owners[proc+1]; i++){ 4716 anzi = ai[i+1] - ai[i]; 4717 if (anzi) { 4718 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4719 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4720 nrows++; 4721 } 4722 } 4723 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4724 k++; 4725 buf_si += len_si[proc]; 4726 } 4727 4728 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4729 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4730 4731 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4732 for (i=0; i<merge->nrecv; i++){ 4733 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4734 } 4735 4736 ierr = PetscFree(len_si);CHKERRQ(ierr); 4737 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4738 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4739 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4740 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4741 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4742 ierr = PetscFree(status);CHKERRQ(ierr); 4743 4744 /* compute a local seq matrix in each processor */ 4745 /*----------------------------------------------*/ 4746 /* allocate bi array and free space for accumulating nonzero column info */ 4747 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4748 bi[0] = 0; 4749 4750 /* create and initialize a linked list */ 4751 nlnk = N+1; 4752 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4753 4754 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4755 len = 0; 4756 len = ai[owners[rank+1]] - ai[owners[rank]]; 4757 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4758 current_space = free_space; 4759 4760 /* determine symbolic info for each local row */ 4761 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4762 4763 for (k=0; k<merge->nrecv; k++){ 4764 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4765 nrows = *buf_ri_k[k]; 4766 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4767 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4768 } 4769 4770 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4771 len = 0; 4772 for (i=0;i<m;i++) { 4773 bnzi = 0; 4774 /* add local non-zero cols of this proc's seqmat into lnk */ 4775 arow = owners[rank] + i; 4776 anzi = ai[arow+1] - ai[arow]; 4777 aj = a->j + ai[arow]; 4778 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4779 bnzi += nlnk; 4780 /* add received col data into lnk */ 4781 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4782 if (i == *nextrow[k]) { /* i-th row */ 4783 anzi = *(nextai[k]+1) - *nextai[k]; 4784 aj = buf_rj[k] + *nextai[k]; 4785 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4786 bnzi += nlnk; 4787 nextrow[k]++; nextai[k]++; 4788 } 4789 } 4790 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4791 4792 /* if free space is not available, make more free space */ 4793 if (current_space->local_remaining<bnzi) { 4794 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4795 nspacedouble++; 4796 } 4797 /* copy data into free space, then initialize lnk */ 4798 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4799 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4800 4801 current_space->array += bnzi; 4802 current_space->local_used += bnzi; 4803 current_space->local_remaining -= bnzi; 4804 4805 bi[i+1] = bi[i] + bnzi; 4806 } 4807 4808 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4809 4810 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4811 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4812 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4813 4814 /* create symbolic parallel matrix B_mpi */ 4815 /*---------------------------------------*/ 4816 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4817 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4818 if (n==PETSC_DECIDE) { 4819 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4820 } else { 4821 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4822 } 4823 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4824 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4825 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4826 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4827 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4828 4829 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4830 B_mpi->assembled = PETSC_FALSE; 4831 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4832 merge->bi = bi; 4833 merge->bj = bj; 4834 merge->buf_ri = buf_ri; 4835 merge->buf_rj = buf_rj; 4836 merge->coi = PETSC_NULL; 4837 merge->coj = PETSC_NULL; 4838 merge->owners_co = PETSC_NULL; 4839 4840 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4841 4842 /* attach the supporting struct to B_mpi for reuse */ 4843 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4844 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4845 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4846 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4847 *mpimat = B_mpi; 4848 4849 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4850 PetscFunctionReturn(0); 4851 } 4852 4853 #undef __FUNCT__ 4854 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4855 /*@C 4856 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4857 matrices from each processor 4858 4859 Collective on MPI_Comm 4860 4861 Input Parameters: 4862 + comm - the communicators the parallel matrix will live on 4863 . seqmat - the input sequential matrices 4864 . m - number of local rows (or PETSC_DECIDE) 4865 . n - number of local columns (or PETSC_DECIDE) 4866 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4867 4868 Output Parameter: 4869 . mpimat - the parallel matrix generated 4870 4871 Level: advanced 4872 4873 Notes: 4874 The dimensions of the sequential matrix in each processor MUST be the same. 4875 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4876 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4877 @*/ 4878 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4879 { 4880 PetscErrorCode ierr; 4881 PetscMPIInt size; 4882 4883 PetscFunctionBegin; 4884 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4885 if (size == 1){ 4886 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4887 if (scall == MAT_INITIAL_MATRIX){ 4888 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4889 } else { 4890 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4891 } 4892 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4893 PetscFunctionReturn(0); 4894 } 4895 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4896 if (scall == MAT_INITIAL_MATRIX){ 4897 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4898 } 4899 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4900 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4901 PetscFunctionReturn(0); 4902 } 4903 4904 #undef __FUNCT__ 4905 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4906 /*@ 4907 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4908 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4909 with MatGetSize() 4910 4911 Not Collective 4912 4913 Input Parameters: 4914 + A - the matrix 4915 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4916 4917 Output Parameter: 4918 . A_loc - the local sequential matrix generated 4919 4920 Level: developer 4921 4922 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4923 4924 @*/ 4925 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4926 { 4927 PetscErrorCode ierr; 4928 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4929 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4930 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4931 MatScalar *aa=a->a,*ba=b->a,*cam; 4932 PetscScalar *ca; 4933 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4934 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4935 PetscBool match; 4936 4937 PetscFunctionBegin; 4938 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4939 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4940 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4941 if (scall == MAT_INITIAL_MATRIX){ 4942 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4943 ci[0] = 0; 4944 for (i=0; i<am; i++){ 4945 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4946 } 4947 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4948 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4949 k = 0; 4950 for (i=0; i<am; i++) { 4951 ncols_o = bi[i+1] - bi[i]; 4952 ncols_d = ai[i+1] - ai[i]; 4953 /* off-diagonal portion of A */ 4954 for (jo=0; jo<ncols_o; jo++) { 4955 col = cmap[*bj]; 4956 if (col >= cstart) break; 4957 cj[k] = col; bj++; 4958 ca[k++] = *ba++; 4959 } 4960 /* diagonal portion of A */ 4961 for (j=0; j<ncols_d; j++) { 4962 cj[k] = cstart + *aj++; 4963 ca[k++] = *aa++; 4964 } 4965 /* off-diagonal portion of A */ 4966 for (j=jo; j<ncols_o; j++) { 4967 cj[k] = cmap[*bj++]; 4968 ca[k++] = *ba++; 4969 } 4970 } 4971 /* put together the new matrix */ 4972 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4973 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4974 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4975 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4976 mat->free_a = PETSC_TRUE; 4977 mat->free_ij = PETSC_TRUE; 4978 mat->nonew = 0; 4979 } else if (scall == MAT_REUSE_MATRIX){ 4980 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4981 ci = mat->i; cj = mat->j; cam = mat->a; 4982 for (i=0; i<am; i++) { 4983 /* off-diagonal portion of A */ 4984 ncols_o = bi[i+1] - bi[i]; 4985 for (jo=0; jo<ncols_o; jo++) { 4986 col = cmap[*bj]; 4987 if (col >= cstart) break; 4988 *cam++ = *ba++; bj++; 4989 } 4990 /* diagonal portion of A */ 4991 ncols_d = ai[i+1] - ai[i]; 4992 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4993 /* off-diagonal portion of A */ 4994 for (j=jo; j<ncols_o; j++) { 4995 *cam++ = *ba++; bj++; 4996 } 4997 } 4998 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4999 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5000 PetscFunctionReturn(0); 5001 } 5002 5003 #undef __FUNCT__ 5004 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5005 /*@C 5006 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5007 5008 Not Collective 5009 5010 Input Parameters: 5011 + A - the matrix 5012 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5013 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5014 5015 Output Parameter: 5016 . A_loc - the local sequential matrix generated 5017 5018 Level: developer 5019 5020 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5021 5022 @*/ 5023 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5024 { 5025 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5026 PetscErrorCode ierr; 5027 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5028 IS isrowa,iscola; 5029 Mat *aloc; 5030 PetscBool match; 5031 5032 PetscFunctionBegin; 5033 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5034 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5035 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5036 if (!row){ 5037 start = A->rmap->rstart; end = A->rmap->rend; 5038 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5039 } else { 5040 isrowa = *row; 5041 } 5042 if (!col){ 5043 start = A->cmap->rstart; 5044 cmap = a->garray; 5045 nzA = a->A->cmap->n; 5046 nzB = a->B->cmap->n; 5047 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5048 ncols = 0; 5049 for (i=0; i<nzB; i++) { 5050 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5051 else break; 5052 } 5053 imark = i; 5054 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5055 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5056 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5057 } else { 5058 iscola = *col; 5059 } 5060 if (scall != MAT_INITIAL_MATRIX){ 5061 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5062 aloc[0] = *A_loc; 5063 } 5064 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5065 *A_loc = aloc[0]; 5066 ierr = PetscFree(aloc);CHKERRQ(ierr); 5067 if (!row){ 5068 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5069 } 5070 if (!col){ 5071 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5072 } 5073 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5074 PetscFunctionReturn(0); 5075 } 5076 5077 #undef __FUNCT__ 5078 #define __FUNCT__ "MatGetBrowsOfAcols" 5079 /*@C 5080 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5081 5082 Collective on Mat 5083 5084 Input Parameters: 5085 + A,B - the matrices in mpiaij format 5086 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5087 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5088 5089 Output Parameter: 5090 + rowb, colb - index sets of rows and columns of B to extract 5091 - B_seq - the sequential matrix generated 5092 5093 Level: developer 5094 5095 @*/ 5096 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5097 { 5098 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5099 PetscErrorCode ierr; 5100 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5101 IS isrowb,iscolb; 5102 Mat *bseq=PETSC_NULL; 5103 5104 PetscFunctionBegin; 5105 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5106 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5107 } 5108 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5109 5110 if (scall == MAT_INITIAL_MATRIX){ 5111 start = A->cmap->rstart; 5112 cmap = a->garray; 5113 nzA = a->A->cmap->n; 5114 nzB = a->B->cmap->n; 5115 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5116 ncols = 0; 5117 for (i=0; i<nzB; i++) { /* row < local row index */ 5118 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5119 else break; 5120 } 5121 imark = i; 5122 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5123 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5124 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5125 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5126 } else { 5127 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5128 isrowb = *rowb; iscolb = *colb; 5129 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5130 bseq[0] = *B_seq; 5131 } 5132 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5133 *B_seq = bseq[0]; 5134 ierr = PetscFree(bseq);CHKERRQ(ierr); 5135 if (!rowb){ 5136 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5137 } else { 5138 *rowb = isrowb; 5139 } 5140 if (!colb){ 5141 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5142 } else { 5143 *colb = iscolb; 5144 } 5145 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 #undef __FUNCT__ 5150 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5151 /* 5152 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5153 of the OFF-DIAGONAL portion of local A 5154 5155 Collective on Mat 5156 5157 Input Parameters: 5158 + A,B - the matrices in mpiaij format 5159 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5160 5161 Output Parameter: 5162 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5163 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5164 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5165 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5166 5167 Level: developer 5168 5169 */ 5170 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5171 { 5172 VecScatter_MPI_General *gen_to,*gen_from; 5173 PetscErrorCode ierr; 5174 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5175 Mat_SeqAIJ *b_oth; 5176 VecScatter ctx=a->Mvctx; 5177 MPI_Comm comm=((PetscObject)ctx)->comm; 5178 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5179 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5180 PetscScalar *rvalues,*svalues; 5181 MatScalar *b_otha,*bufa,*bufA; 5182 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5183 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5184 MPI_Status *sstatus,rstatus; 5185 PetscMPIInt jj; 5186 PetscInt *cols,sbs,rbs; 5187 PetscScalar *vals; 5188 5189 PetscFunctionBegin; 5190 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5191 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5192 } 5193 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5194 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5195 5196 gen_to = (VecScatter_MPI_General*)ctx->todata; 5197 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5198 rvalues = gen_from->values; /* holds the length of receiving row */ 5199 svalues = gen_to->values; /* holds the length of sending row */ 5200 nrecvs = gen_from->n; 5201 nsends = gen_to->n; 5202 5203 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5204 srow = gen_to->indices; /* local row index to be sent */ 5205 sstarts = gen_to->starts; 5206 sprocs = gen_to->procs; 5207 sstatus = gen_to->sstatus; 5208 sbs = gen_to->bs; 5209 rstarts = gen_from->starts; 5210 rprocs = gen_from->procs; 5211 rbs = gen_from->bs; 5212 5213 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5214 if (scall == MAT_INITIAL_MATRIX){ 5215 /* i-array */ 5216 /*---------*/ 5217 /* post receives */ 5218 for (i=0; i<nrecvs; i++){ 5219 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5220 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5221 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5222 } 5223 5224 /* pack the outgoing message */ 5225 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5226 sstartsj[0] = 0; rstartsj[0] = 0; 5227 len = 0; /* total length of j or a array to be sent */ 5228 k = 0; 5229 for (i=0; i<nsends; i++){ 5230 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5231 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5232 for (j=0; j<nrows; j++) { 5233 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5234 for (l=0; l<sbs; l++){ 5235 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5236 rowlen[j*sbs+l] = ncols; 5237 len += ncols; 5238 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5239 } 5240 k++; 5241 } 5242 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5243 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5244 } 5245 /* recvs and sends of i-array are completed */ 5246 i = nrecvs; 5247 while (i--) { 5248 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5249 } 5250 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5251 5252 /* allocate buffers for sending j and a arrays */ 5253 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5254 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5255 5256 /* create i-array of B_oth */ 5257 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5258 b_othi[0] = 0; 5259 len = 0; /* total length of j or a array to be received */ 5260 k = 0; 5261 for (i=0; i<nrecvs; i++){ 5262 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5263 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5264 for (j=0; j<nrows; j++) { 5265 b_othi[k+1] = b_othi[k] + rowlen[j]; 5266 len += rowlen[j]; k++; 5267 } 5268 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5269 } 5270 5271 /* allocate space for j and a arrrays of B_oth */ 5272 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5273 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5274 5275 /* j-array */ 5276 /*---------*/ 5277 /* post receives of j-array */ 5278 for (i=0; i<nrecvs; i++){ 5279 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5280 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5281 } 5282 5283 /* pack the outgoing message j-array */ 5284 k = 0; 5285 for (i=0; i<nsends; i++){ 5286 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5287 bufJ = bufj+sstartsj[i]; 5288 for (j=0; j<nrows; j++) { 5289 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5290 for (ll=0; ll<sbs; ll++){ 5291 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5292 for (l=0; l<ncols; l++){ 5293 *bufJ++ = cols[l]; 5294 } 5295 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5296 } 5297 } 5298 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5299 } 5300 5301 /* recvs and sends of j-array are completed */ 5302 i = nrecvs; 5303 while (i--) { 5304 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5305 } 5306 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5307 } else if (scall == MAT_REUSE_MATRIX){ 5308 sstartsj = *startsj_s; 5309 rstartsj = *startsj_r; 5310 bufa = *bufa_ptr; 5311 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5312 b_otha = b_oth->a; 5313 } else { 5314 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5315 } 5316 5317 /* a-array */ 5318 /*---------*/ 5319 /* post receives of a-array */ 5320 for (i=0; i<nrecvs; i++){ 5321 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5322 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5323 } 5324 5325 /* pack the outgoing message a-array */ 5326 k = 0; 5327 for (i=0; i<nsends; i++){ 5328 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5329 bufA = bufa+sstartsj[i]; 5330 for (j=0; j<nrows; j++) { 5331 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5332 for (ll=0; ll<sbs; ll++){ 5333 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5334 for (l=0; l<ncols; l++){ 5335 *bufA++ = vals[l]; 5336 } 5337 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5338 } 5339 } 5340 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5341 } 5342 /* recvs and sends of a-array are completed */ 5343 i = nrecvs; 5344 while (i--) { 5345 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5346 } 5347 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5348 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5349 5350 if (scall == MAT_INITIAL_MATRIX){ 5351 /* put together the new matrix */ 5352 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5353 5354 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5355 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5356 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5357 b_oth->free_a = PETSC_TRUE; 5358 b_oth->free_ij = PETSC_TRUE; 5359 b_oth->nonew = 0; 5360 5361 ierr = PetscFree(bufj);CHKERRQ(ierr); 5362 if (!startsj_s || !bufa_ptr){ 5363 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5364 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5365 } else { 5366 *startsj_s = sstartsj; 5367 *startsj_r = rstartsj; 5368 *bufa_ptr = bufa; 5369 } 5370 } 5371 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5372 PetscFunctionReturn(0); 5373 } 5374 5375 #undef __FUNCT__ 5376 #define __FUNCT__ "MatGetCommunicationStructs" 5377 /*@C 5378 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5379 5380 Not Collective 5381 5382 Input Parameters: 5383 . A - The matrix in mpiaij format 5384 5385 Output Parameter: 5386 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5387 . colmap - A map from global column index to local index into lvec 5388 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5389 5390 Level: developer 5391 5392 @*/ 5393 #if defined (PETSC_USE_CTABLE) 5394 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5395 #else 5396 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5397 #endif 5398 { 5399 Mat_MPIAIJ *a; 5400 5401 PetscFunctionBegin; 5402 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5403 PetscValidPointer(lvec, 2); 5404 PetscValidPointer(colmap, 3); 5405 PetscValidPointer(multScatter, 4); 5406 a = (Mat_MPIAIJ *) A->data; 5407 if (lvec) *lvec = a->lvec; 5408 if (colmap) *colmap = a->colmap; 5409 if (multScatter) *multScatter = a->Mvctx; 5410 PetscFunctionReturn(0); 5411 } 5412 5413 EXTERN_C_BEGIN 5414 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5415 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5416 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5417 EXTERN_C_END 5418 5419 #undef __FUNCT__ 5420 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5421 /* 5422 Computes (B'*A')' since computing B*A directly is untenable 5423 5424 n p p 5425 ( ) ( ) ( ) 5426 m ( A ) * n ( B ) = m ( C ) 5427 ( ) ( ) ( ) 5428 5429 */ 5430 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5431 { 5432 PetscErrorCode ierr; 5433 Mat At,Bt,Ct; 5434 5435 PetscFunctionBegin; 5436 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5437 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5438 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5439 ierr = MatDestroy(&At);CHKERRQ(ierr); 5440 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5441 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5442 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5443 PetscFunctionReturn(0); 5444 } 5445 5446 #undef __FUNCT__ 5447 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5448 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5449 { 5450 PetscErrorCode ierr; 5451 PetscInt m=A->rmap->n,n=B->cmap->n; 5452 Mat Cmat; 5453 5454 PetscFunctionBegin; 5455 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5456 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5457 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5458 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5459 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5460 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5461 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5462 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5463 *C = Cmat; 5464 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5465 PetscFunctionReturn(0); 5466 } 5467 5468 /* ----------------------------------------------------------------*/ 5469 #undef __FUNCT__ 5470 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5471 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5472 { 5473 PetscErrorCode ierr; 5474 5475 PetscFunctionBegin; 5476 if (scall == MAT_INITIAL_MATRIX){ 5477 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5478 } 5479 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5480 PetscFunctionReturn(0); 5481 } 5482 5483 EXTERN_C_BEGIN 5484 #if defined(PETSC_HAVE_MUMPS) 5485 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5486 #endif 5487 #if defined(PETSC_HAVE_PASTIX) 5488 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5489 #endif 5490 #if defined(PETSC_HAVE_SUPERLU_DIST) 5491 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5492 #endif 5493 #if defined(PETSC_HAVE_SPOOLES) 5494 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5495 #endif 5496 EXTERN_C_END 5497 5498 /*MC 5499 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5500 5501 Options Database Keys: 5502 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5503 5504 Level: beginner 5505 5506 .seealso: MatCreateAIJ() 5507 M*/ 5508 5509 EXTERN_C_BEGIN 5510 #undef __FUNCT__ 5511 #define __FUNCT__ "MatCreate_MPIAIJ" 5512 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5513 { 5514 Mat_MPIAIJ *b; 5515 PetscErrorCode ierr; 5516 PetscMPIInt size; 5517 5518 PetscFunctionBegin; 5519 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5520 5521 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5522 B->data = (void*)b; 5523 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5524 B->assembled = PETSC_FALSE; 5525 5526 B->insertmode = NOT_SET_VALUES; 5527 b->size = size; 5528 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5529 5530 /* build cache for off array entries formed */ 5531 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5532 b->donotstash = PETSC_FALSE; 5533 b->colmap = 0; 5534 b->garray = 0; 5535 b->roworiented = PETSC_TRUE; 5536 5537 /* stuff used for matrix vector multiply */ 5538 b->lvec = PETSC_NULL; 5539 b->Mvctx = PETSC_NULL; 5540 5541 /* stuff for MatGetRow() */ 5542 b->rowindices = 0; 5543 b->rowvalues = 0; 5544 b->getrowactive = PETSC_FALSE; 5545 5546 #if defined(PETSC_HAVE_TXPETSCGPU) 5547 /* stuff for GPU capabilities */ 5548 b->diagGPUMatFormat = MAT_CSR; 5549 b->offdiagGPUMatFormat = MAT_CSR; 5550 #endif 5551 5552 #if defined(PETSC_HAVE_SPOOLES) 5553 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5554 "MatGetFactor_mpiaij_spooles", 5555 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5556 #endif 5557 #if defined(PETSC_HAVE_MUMPS) 5558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5559 "MatGetFactor_aij_mumps", 5560 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5561 #endif 5562 #if defined(PETSC_HAVE_PASTIX) 5563 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5564 "MatGetFactor_mpiaij_pastix", 5565 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5566 #endif 5567 #if defined(PETSC_HAVE_SUPERLU_DIST) 5568 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5569 "MatGetFactor_mpiaij_superlu_dist", 5570 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5571 #endif 5572 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5573 "MatStoreValues_MPIAIJ", 5574 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5575 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5576 "MatRetrieveValues_MPIAIJ", 5577 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5578 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5579 "MatGetDiagonalBlock_MPIAIJ", 5580 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5581 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5582 "MatIsTranspose_MPIAIJ", 5583 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5585 "MatMPIAIJSetPreallocation_MPIAIJ", 5586 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5587 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5588 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5589 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5591 "MatDiagonalScaleLocal_MPIAIJ", 5592 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5594 "MatConvert_MPIAIJ_MPIAIJPERM", 5595 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5597 "MatConvert_MPIAIJ_MPIAIJCRL", 5598 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5600 "MatConvert_MPIAIJ_MPISBAIJ", 5601 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5603 "MatMatMult_MPIDense_MPIAIJ", 5604 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5606 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5607 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5609 "MatMatMultNumeric_MPIDense_MPIAIJ", 5610 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5611 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5612 PetscFunctionReturn(0); 5613 } 5614 EXTERN_C_END 5615 5616 #undef __FUNCT__ 5617 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5618 /*@ 5619 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5620 and "off-diagonal" part of the matrix in CSR format. 5621 5622 Collective on MPI_Comm 5623 5624 Input Parameters: 5625 + comm - MPI communicator 5626 . m - number of local rows (Cannot be PETSC_DECIDE) 5627 . n - This value should be the same as the local size used in creating the 5628 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5629 calculated if N is given) For square matrices n is almost always m. 5630 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5631 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5632 . i - row indices for "diagonal" portion of matrix 5633 . j - column indices 5634 . a - matrix values 5635 . oi - row indices for "off-diagonal" portion of matrix 5636 . oj - column indices 5637 - oa - matrix values 5638 5639 Output Parameter: 5640 . mat - the matrix 5641 5642 Level: advanced 5643 5644 Notes: 5645 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5646 must free the arrays once the matrix has been destroyed and not before. 5647 5648 The i and j indices are 0 based 5649 5650 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5651 5652 This sets local rows and cannot be used to set off-processor values. 5653 5654 You cannot later use MatSetValues() to change values in this matrix. 5655 5656 .keywords: matrix, aij, compressed row, sparse, parallel 5657 5658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5659 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5660 @*/ 5661 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5662 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5663 { 5664 PetscErrorCode ierr; 5665 Mat_MPIAIJ *maij; 5666 5667 PetscFunctionBegin; 5668 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5669 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5670 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5671 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5672 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5673 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5674 maij = (Mat_MPIAIJ*) (*mat)->data; 5675 maij->donotstash = PETSC_TRUE; 5676 (*mat)->preallocated = PETSC_TRUE; 5677 5678 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5679 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5680 5681 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5682 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5683 5684 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5685 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5686 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5687 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5688 5689 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5690 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5691 PetscFunctionReturn(0); 5692 } 5693 5694 /* 5695 Special version for direct calls from Fortran 5696 */ 5697 #include <petsc-private/fortranimpl.h> 5698 5699 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5703 #endif 5704 5705 /* Change these macros so can be used in void function */ 5706 #undef CHKERRQ 5707 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5708 #undef SETERRQ2 5709 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5710 #undef SETERRQ3 5711 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5712 #undef SETERRQ 5713 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5714 5715 EXTERN_C_BEGIN 5716 #undef __FUNCT__ 5717 #define __FUNCT__ "matsetvaluesmpiaij_" 5718 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5719 { 5720 Mat mat = *mmat; 5721 PetscInt m = *mm, n = *mn; 5722 InsertMode addv = *maddv; 5723 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5724 PetscScalar value; 5725 PetscErrorCode ierr; 5726 5727 MatCheckPreallocated(mat,1); 5728 if (mat->insertmode == NOT_SET_VALUES) { 5729 mat->insertmode = addv; 5730 } 5731 #if defined(PETSC_USE_DEBUG) 5732 else if (mat->insertmode != addv) { 5733 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5734 } 5735 #endif 5736 { 5737 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5738 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5739 PetscBool roworiented = aij->roworiented; 5740 5741 /* Some Variables required in the macro */ 5742 Mat A = aij->A; 5743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5744 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5745 MatScalar *aa = a->a; 5746 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5747 Mat B = aij->B; 5748 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5749 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5750 MatScalar *ba = b->a; 5751 5752 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5753 PetscInt nonew = a->nonew; 5754 MatScalar *ap1,*ap2; 5755 5756 PetscFunctionBegin; 5757 for (i=0; i<m; i++) { 5758 if (im[i] < 0) continue; 5759 #if defined(PETSC_USE_DEBUG) 5760 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5761 #endif 5762 if (im[i] >= rstart && im[i] < rend) { 5763 row = im[i] - rstart; 5764 lastcol1 = -1; 5765 rp1 = aj + ai[row]; 5766 ap1 = aa + ai[row]; 5767 rmax1 = aimax[row]; 5768 nrow1 = ailen[row]; 5769 low1 = 0; 5770 high1 = nrow1; 5771 lastcol2 = -1; 5772 rp2 = bj + bi[row]; 5773 ap2 = ba + bi[row]; 5774 rmax2 = bimax[row]; 5775 nrow2 = bilen[row]; 5776 low2 = 0; 5777 high2 = nrow2; 5778 5779 for (j=0; j<n; j++) { 5780 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5781 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5782 if (in[j] >= cstart && in[j] < cend){ 5783 col = in[j] - cstart; 5784 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5785 } else if (in[j] < 0) continue; 5786 #if defined(PETSC_USE_DEBUG) 5787 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5788 #endif 5789 else { 5790 if (mat->was_assembled) { 5791 if (!aij->colmap) { 5792 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5793 } 5794 #if defined (PETSC_USE_CTABLE) 5795 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5796 col--; 5797 #else 5798 col = aij->colmap[in[j]] - 1; 5799 #endif 5800 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5801 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5802 col = in[j]; 5803 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5804 B = aij->B; 5805 b = (Mat_SeqAIJ*)B->data; 5806 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5807 rp2 = bj + bi[row]; 5808 ap2 = ba + bi[row]; 5809 rmax2 = bimax[row]; 5810 nrow2 = bilen[row]; 5811 low2 = 0; 5812 high2 = nrow2; 5813 bm = aij->B->rmap->n; 5814 ba = b->a; 5815 } 5816 } else col = in[j]; 5817 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5818 } 5819 } 5820 } else { 5821 if (!aij->donotstash) { 5822 if (roworiented) { 5823 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5824 } else { 5825 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5826 } 5827 } 5828 } 5829 }} 5830 PetscFunctionReturnVoid(); 5831 } 5832 EXTERN_C_END 5833 5834