1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petscblaslapack.h> 4 5 /*MC 6 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 7 8 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 9 and MATMPIAIJ otherwise. As a result, for single process communicators, 10 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 11 for communicators controlling multiple processes. It is recommended that you call both of 12 the above preallocation routines for simplicity. 13 14 Options Database Keys: 15 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 16 17 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 18 enough exist. 19 20 Level: beginner 21 22 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ 23 M*/ 24 25 /*MC 26 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 27 28 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 29 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 30 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 31 for communicators controlling multiple processes. It is recommended that you call both of 32 the above preallocation routines for simplicity. 33 34 Options Database Keys: 35 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 36 37 Level: beginner 38 39 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 40 M*/ 41 42 #undef __FUNCT__ 43 #define __FUNCT__ "MatFindNonZeroRows_MPIAIJ" 44 PetscErrorCode MatFindNonZeroRows_MPIAIJ(Mat M,IS *keptrows) 45 { 46 PetscErrorCode ierr; 47 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 48 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 49 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 50 const PetscInt *ia,*ib; 51 const MatScalar *aa,*bb; 52 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 53 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 54 55 PetscFunctionBegin; 56 *keptrows = 0; 57 ia = a->i; 58 ib = b->i; 59 for (i=0; i<m; i++) { 60 na = ia[i+1] - ia[i]; 61 nb = ib[i+1] - ib[i]; 62 if (!na && !nb) { 63 cnt++; 64 goto ok1; 65 } 66 aa = a->a + ia[i]; 67 for (j=0; j<na; j++) { 68 if (aa[j] != 0.0) goto ok1; 69 } 70 bb = b->a + ib[i]; 71 for (j=0; j <nb; j++) { 72 if (bb[j] != 0.0) goto ok1; 73 } 74 cnt++; 75 ok1:; 76 } 77 ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,((PetscObject)M)->comm);CHKERRQ(ierr); 78 if (!n0rows) PetscFunctionReturn(0); 79 ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr); 80 cnt = 0; 81 for (i=0; i<m; i++) { 82 na = ia[i+1] - ia[i]; 83 nb = ib[i+1] - ib[i]; 84 if (!na && !nb) continue; 85 aa = a->a + ia[i]; 86 for(j=0; j<na;j++) { 87 if (aa[j] != 0.0) { 88 rows[cnt++] = rstart + i; 89 goto ok2; 90 } 91 } 92 bb = b->a + ib[i]; 93 for (j=0; j<nb; j++) { 94 if (bb[j] != 0.0) { 95 rows[cnt++] = rstart + i; 96 goto ok2; 97 } 98 } 99 ok2:; 100 } 101 ierr = ISCreateGeneral(((PetscObject)M)->comm,cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 102 PetscFunctionReturn(0); 103 } 104 105 #undef __FUNCT__ 106 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 107 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 108 { 109 PetscErrorCode ierr; 110 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 111 PetscInt i,n,*garray = aij->garray; 112 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 113 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 114 PetscReal *work; 115 116 PetscFunctionBegin; 117 ierr = MatGetSize(A,PETSC_NULL,&n);CHKERRQ(ierr); 118 ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr); 119 ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr); 120 if (type == NORM_2) { 121 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 122 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 123 } 124 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 125 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 126 } 127 } else if (type == NORM_1) { 128 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 129 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 130 } 131 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 132 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 133 } 134 } else if (type == NORM_INFINITY) { 135 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 136 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 137 } 138 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 139 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 140 } 141 142 } else SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Unknown NormType"); 143 if (type == NORM_INFINITY) { 144 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 145 } else { 146 ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 147 } 148 ierr = PetscFree(work);CHKERRQ(ierr); 149 if (type == NORM_2) { 150 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNCT__ 156 #define __FUNCT__ "MatDistribute_MPIAIJ" 157 /* 158 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 159 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 160 161 Only for square matrices 162 */ 163 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 164 { 165 PetscMPIInt rank,size; 166 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld; 167 PetscErrorCode ierr; 168 Mat mat; 169 Mat_SeqAIJ *gmata; 170 PetscMPIInt tag; 171 MPI_Status status; 172 PetscBool aij; 173 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 174 175 PetscFunctionBegin; 176 CHKMEMQ; 177 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 178 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 179 if (!rank) { 180 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 181 if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 182 } 183 if (reuse == MAT_INITIAL_MATRIX) { 184 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 185 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 186 ierr = MatSetBlockSizes(mat,gmat->rmap->bs,gmat->cmap->bs);CHKERRQ(ierr); 187 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 188 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 189 ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr); 190 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 191 rowners[0] = 0; 192 for (i=2; i<=size; i++) { 193 rowners[i] += rowners[i-1]; 194 } 195 rstart = rowners[rank]; 196 rend = rowners[rank+1]; 197 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 198 if (!rank) { 199 gmata = (Mat_SeqAIJ*) gmat->data; 200 /* send row lengths to all processors */ 201 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 202 for (i=1; i<size; i++) { 203 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 204 } 205 /* determine number diagonal and off-diagonal counts */ 206 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 207 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 208 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 209 jj = 0; 210 for (i=0; i<m; i++) { 211 for (j=0; j<dlens[i]; j++) { 212 if (gmata->j[jj] < rstart) ld[i]++; 213 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 214 jj++; 215 } 216 } 217 /* send column indices to other processes */ 218 for (i=1; i<size; i++) { 219 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 220 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 221 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 222 } 223 224 /* send numerical values to other processes */ 225 for (i=1; i<size; i++) { 226 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 227 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 228 } 229 gmataa = gmata->a; 230 gmataj = gmata->j; 231 232 } else { 233 /* receive row lengths */ 234 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 235 /* receive column indices */ 236 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 237 ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr); 238 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 239 /* determine number diagonal and off-diagonal counts */ 240 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 241 ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr); 242 ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr); 243 jj = 0; 244 for (i=0; i<m; i++) { 245 for (j=0; j<dlens[i]; j++) { 246 if (gmataj[jj] < rstart) ld[i]++; 247 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 248 jj++; 249 } 250 } 251 /* receive numerical values */ 252 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 253 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 254 } 255 /* set preallocation */ 256 for (i=0; i<m; i++) { 257 dlens[i] -= olens[i]; 258 } 259 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 260 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 261 262 for (i=0; i<m; i++) { 263 dlens[i] += olens[i]; 264 } 265 cnt = 0; 266 for (i=0; i<m; i++) { 267 row = rstart + i; 268 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 269 cnt += dlens[i]; 270 } 271 if (rank) { 272 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 273 } 274 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 275 ierr = PetscFree(rowners);CHKERRQ(ierr); 276 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 277 *inmat = mat; 278 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 279 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 280 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 281 mat = *inmat; 282 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 283 if (!rank) { 284 /* send numerical values to other processes */ 285 gmata = (Mat_SeqAIJ*) gmat->data; 286 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 287 gmataa = gmata->a; 288 for (i=1; i<size; i++) { 289 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 290 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 291 } 292 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 293 } else { 294 /* receive numerical values from process 0*/ 295 nz = Ad->nz + Ao->nz; 296 ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 297 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 298 } 299 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 300 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 301 ad = Ad->a; 302 ao = Ao->a; 303 if (mat->rmap->n) { 304 i = 0; 305 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 306 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 307 } 308 for (i=1; i<mat->rmap->n; i++) { 309 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 310 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 311 } 312 i--; 313 if (mat->rmap->n) { 314 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 315 } 316 if (rank) { 317 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 318 } 319 } 320 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 322 CHKMEMQ; 323 PetscFunctionReturn(0); 324 } 325 326 /* 327 Local utility routine that creates a mapping from the global column 328 number to the local number in the off-diagonal part of the local 329 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 330 a slightly higher hash table cost; without it it is not scalable (each processor 331 has an order N integer array but is fast to acess. 332 */ 333 #undef __FUNCT__ 334 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 335 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 336 { 337 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 338 PetscErrorCode ierr; 339 PetscInt n = aij->B->cmap->n,i; 340 341 PetscFunctionBegin; 342 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 343 #if defined (PETSC_USE_CTABLE) 344 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 345 for (i=0; i<n; i++){ 346 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr); 350 ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 351 ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 352 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 353 #endif 354 PetscFunctionReturn(0); 355 } 356 357 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \ 358 { \ 359 if (col <= lastcol1) low1 = 0; else high1 = nrow1; \ 360 lastcol1 = col;\ 361 while (high1-low1 > 5) { \ 362 t = (low1+high1)/2; \ 363 if (rp1[t] > col) high1 = t; \ 364 else low1 = t; \ 365 } \ 366 for (_i=low1; _i<high1; _i++) { \ 367 if (rp1[_i] > col) break; \ 368 if (rp1[_i] == col) { \ 369 if (addv == ADD_VALUES) ap1[_i] += value; \ 370 else ap1[_i] = value; \ 371 goto a_noinsert; \ 372 } \ 373 } \ 374 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 375 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 376 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 377 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 378 N = nrow1++ - 1; a->nz++; high1++; \ 379 /* shift up all the later entries in this row */ \ 380 for (ii=N; ii>=_i; ii--) { \ 381 rp1[ii+1] = rp1[ii]; \ 382 ap1[ii+1] = ap1[ii]; \ 383 } \ 384 rp1[_i] = col; \ 385 ap1[_i] = value; \ 386 a_noinsert: ; \ 387 ailen[row] = nrow1; \ 388 } 389 390 391 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \ 392 { \ 393 if (col <= lastcol2) low2 = 0; else high2 = nrow2; \ 394 lastcol2 = col;\ 395 while (high2-low2 > 5) { \ 396 t = (low2+high2)/2; \ 397 if (rp2[t] > col) high2 = t; \ 398 else low2 = t; \ 399 } \ 400 for (_i=low2; _i<high2; _i++) { \ 401 if (rp2[_i] > col) break; \ 402 if (rp2[_i] == col) { \ 403 if (addv == ADD_VALUES) ap2[_i] += value; \ 404 else ap2[_i] = value; \ 405 goto b_noinsert; \ 406 } \ 407 } \ 408 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 409 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 410 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 411 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 412 N = nrow2++ - 1; b->nz++; high2++; \ 413 /* shift up all the later entries in this row */ \ 414 for (ii=N; ii>=_i; ii--) { \ 415 rp2[ii+1] = rp2[ii]; \ 416 ap2[ii+1] = ap2[ii]; \ 417 } \ 418 rp2[_i] = col; \ 419 ap2[_i] = value; \ 420 b_noinsert: ; \ 421 bilen[row] = nrow2; \ 422 } 423 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 426 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 427 { 428 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 430 PetscErrorCode ierr; 431 PetscInt l,*garray = mat->garray,diag; 432 433 PetscFunctionBegin; 434 /* code only works for square matrices A */ 435 436 /* find size of row to the left of the diagonal part */ 437 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 438 row = row - diag; 439 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 440 if (garray[b->j[b->i[row]+l]] > diag) break; 441 } 442 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 443 444 /* diagonal part */ 445 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 446 447 /* right of diagonal part */ 448 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 449 PetscFunctionReturn(0); 450 } 451 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatSetValues_MPIAIJ" 454 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 455 { 456 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 457 PetscScalar value; 458 PetscErrorCode ierr; 459 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 460 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 461 PetscBool roworiented = aij->roworiented; 462 463 /* Some Variables required in the macro */ 464 Mat A = aij->A; 465 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 466 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 467 MatScalar *aa = a->a; 468 PetscBool ignorezeroentries = a->ignorezeroentries; 469 Mat B = aij->B; 470 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 471 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 472 MatScalar *ba = b->a; 473 474 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 475 PetscInt nonew; 476 MatScalar *ap1,*ap2; 477 478 PetscFunctionBegin; 479 if (v) PetscValidScalarPointer(v,6); 480 for (i=0; i<m; i++) { 481 if (im[i] < 0) continue; 482 #if defined(PETSC_USE_DEBUG) 483 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 484 #endif 485 if (im[i] >= rstart && im[i] < rend) { 486 row = im[i] - rstart; 487 lastcol1 = -1; 488 rp1 = aj + ai[row]; 489 ap1 = aa + ai[row]; 490 rmax1 = aimax[row]; 491 nrow1 = ailen[row]; 492 low1 = 0; 493 high1 = nrow1; 494 lastcol2 = -1; 495 rp2 = bj + bi[row]; 496 ap2 = ba + bi[row]; 497 rmax2 = bimax[row]; 498 nrow2 = bilen[row]; 499 low2 = 0; 500 high2 = nrow2; 501 502 for (j=0; j<n; j++) { 503 if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0; 504 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 505 if (in[j] >= cstart && in[j] < cend){ 506 col = in[j] - cstart; 507 nonew = a->nonew; 508 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 509 } else if (in[j] < 0) continue; 510 #if defined(PETSC_USE_DEBUG) 511 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 512 #endif 513 else { 514 if (mat->was_assembled) { 515 if (!aij->colmap) { 516 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 517 } 518 #if defined (PETSC_USE_CTABLE) 519 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 520 col--; 521 #else 522 col = aij->colmap[in[j]] - 1; 523 #endif 524 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 525 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 526 col = in[j]; 527 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 528 B = aij->B; 529 b = (Mat_SeqAIJ*)B->data; 530 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 531 rp2 = bj + bi[row]; 532 ap2 = ba + bi[row]; 533 rmax2 = bimax[row]; 534 nrow2 = bilen[row]; 535 low2 = 0; 536 high2 = nrow2; 537 bm = aij->B->rmap->n; 538 ba = b->a; 539 } 540 } else col = in[j]; 541 nonew = b->nonew; 542 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 543 } 544 } 545 } else { 546 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 547 if (!aij->donotstash) { 548 mat->assembled = PETSC_FALSE; 549 if (roworiented) { 550 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 551 } else { 552 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 553 } 554 } 555 } 556 } 557 PetscFunctionReturn(0); 558 } 559 560 #undef __FUNCT__ 561 #define __FUNCT__ "MatGetValues_MPIAIJ" 562 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 563 { 564 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 565 PetscErrorCode ierr; 566 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 567 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 568 569 PetscFunctionBegin; 570 for (i=0; i<m; i++) { 571 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 572 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 573 if (idxm[i] >= rstart && idxm[i] < rend) { 574 row = idxm[i] - rstart; 575 for (j=0; j<n; j++) { 576 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 577 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 578 if (idxn[j] >= cstart && idxn[j] < cend){ 579 col = idxn[j] - cstart; 580 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 581 } else { 582 if (!aij->colmap) { 583 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 584 } 585 #if defined (PETSC_USE_CTABLE) 586 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 587 col --; 588 #else 589 col = aij->colmap[idxn[j]] - 1; 590 #endif 591 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 592 else { 593 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 594 } 595 } 596 } 597 } else { 598 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 605 606 #undef __FUNCT__ 607 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 608 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 609 { 610 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 611 PetscErrorCode ierr; 612 PetscInt nstash,reallocs; 613 InsertMode addv; 614 615 PetscFunctionBegin; 616 if (aij->donotstash || mat->nooffprocentries) { 617 PetscFunctionReturn(0); 618 } 619 620 /* make sure all processors are either in INSERTMODE or ADDMODE */ 621 ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr); 622 if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 623 mat->insertmode = addv; /* in case this processor had no cache */ 624 625 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 626 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 627 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 633 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 634 { 635 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 636 Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data; 637 PetscErrorCode ierr; 638 PetscMPIInt n; 639 PetscInt i,j,rstart,ncols,flg; 640 PetscInt *row,*col; 641 PetscBool other_disassembled; 642 PetscScalar *val; 643 InsertMode addv = mat->insertmode; 644 645 /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */ 646 PetscFunctionBegin; 647 if (!aij->donotstash && !mat->nooffprocentries) { 648 while (1) { 649 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 650 if (!flg) break; 651 652 for (i=0; i<n;) { 653 /* Now identify the consecutive vals belonging to the same row */ 654 for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 655 if (j < n) ncols = j-i; 656 else ncols = n-i; 657 /* Now assemble all these values with a single function call */ 658 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 659 i = j; 660 } 661 } 662 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 663 } 664 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 665 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 666 667 /* determine if any processor has disassembled, if so we must 668 also disassemble ourselfs, in order that we may reassemble. */ 669 /* 670 if nonzero structure of submatrix B cannot change then we know that 671 no processor disassembled thus we can skip this stuff 672 */ 673 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 674 ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr); 675 if (mat->was_assembled && !other_disassembled) { 676 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 677 } 678 } 679 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 680 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 681 } 682 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 683 ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr); 684 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 685 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 686 687 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 688 aij->rowvalues = 0; 689 690 /* used by MatAXPY() */ 691 a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */ 692 a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */ 693 694 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 695 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 696 PetscFunctionReturn(0); 697 } 698 699 #undef __FUNCT__ 700 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 701 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 702 { 703 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 704 PetscErrorCode ierr; 705 706 PetscFunctionBegin; 707 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 708 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 709 PetscFunctionReturn(0); 710 } 711 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_MPIAIJ" 714 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 715 { 716 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 717 PetscErrorCode ierr; 718 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 719 PetscInt i,*owners = A->rmap->range; 720 PetscInt *nprocs,j,idx,nsends,row; 721 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 722 PetscInt *rvalues,count,base,slen,*source; 723 PetscInt *lens,*lrows,*values,rstart=A->rmap->rstart; 724 MPI_Comm comm = ((PetscObject)A)->comm; 725 MPI_Request *send_waits,*recv_waits; 726 MPI_Status recv_status,*send_status; 727 const PetscScalar *xx; 728 PetscScalar *bb; 729 #if defined(PETSC_DEBUG) 730 PetscBool found = PETSC_FALSE; 731 #endif 732 733 PetscFunctionBegin; 734 /* first count number of contributors to each processor */ 735 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 736 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 737 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 738 j = 0; 739 for (i=0; i<N; i++) { 740 if (lastidx > (idx = rows[i])) j = 0; 741 lastidx = idx; 742 for (; j<size; j++) { 743 if (idx >= owners[j] && idx < owners[j+1]) { 744 nprocs[2*j]++; 745 nprocs[2*j+1] = 1; 746 owner[i] = j; 747 #if defined(PETSC_DEBUG) 748 found = PETSC_TRUE; 749 #endif 750 break; 751 } 752 } 753 #if defined(PETSC_DEBUG) 754 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 755 found = PETSC_FALSE; 756 #endif 757 } 758 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 759 760 if (A->nooffproczerorows) { 761 if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row"); 762 nrecvs = nsends; 763 nmax = N; 764 } else { 765 /* inform other processors of number of messages and max length*/ 766 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 767 } 768 769 /* post receives: */ 770 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 771 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 772 for (i=0; i<nrecvs; i++) { 773 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 774 } 775 776 /* do sends: 777 1) starts[i] gives the starting index in svalues for stuff going to 778 the ith processor 779 */ 780 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 781 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 782 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 783 starts[0] = 0; 784 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 785 for (i=0; i<N; i++) { 786 svalues[starts[owner[i]]++] = rows[i]; 787 } 788 789 starts[0] = 0; 790 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 791 count = 0; 792 for (i=0; i<size; i++) { 793 if (nprocs[2*i+1]) { 794 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 795 } 796 } 797 ierr = PetscFree(starts);CHKERRQ(ierr); 798 799 base = owners[rank]; 800 801 /* wait on receives */ 802 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 803 count = nrecvs; slen = 0; 804 while (count) { 805 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 806 /* unpack receives into our local space */ 807 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 808 source[imdex] = recv_status.MPI_SOURCE; 809 lens[imdex] = n; 810 slen += n; 811 count--; 812 } 813 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 814 815 /* move the data into the send scatter */ 816 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 817 count = 0; 818 for (i=0; i<nrecvs; i++) { 819 values = rvalues + i*nmax; 820 for (j=0; j<lens[i]; j++) { 821 lrows[count++] = values[j] - base; 822 } 823 } 824 ierr = PetscFree(rvalues);CHKERRQ(ierr); 825 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 826 ierr = PetscFree(owner);CHKERRQ(ierr); 827 ierr = PetscFree(nprocs);CHKERRQ(ierr); 828 829 /* fix right hand side if needed */ 830 if (x && b) { 831 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 832 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 833 for (i=0; i<slen; i++) { 834 bb[lrows[i]] = diag*xx[lrows[i]]; 835 } 836 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 837 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 838 } 839 /* 840 Zero the required rows. If the "diagonal block" of the matrix 841 is square and the user wishes to set the diagonal we use separate 842 code so that MatSetValues() is not called for each diagonal allocating 843 new memory, thus calling lots of mallocs and slowing things down. 844 845 */ 846 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 847 ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr); 848 if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 849 ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr); 850 } else if (diag != 0.0) { 851 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 852 if (((Mat_SeqAIJ*)l->A->data)->nonew) { 853 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\ 854 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 855 } 856 for (i = 0; i < slen; i++) { 857 row = lrows[i] + rstart; 858 ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 859 } 860 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 861 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 862 } else { 863 ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr); 864 } 865 ierr = PetscFree(lrows);CHKERRQ(ierr); 866 867 /* wait on sends */ 868 if (nsends) { 869 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 870 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 871 ierr = PetscFree(send_status);CHKERRQ(ierr); 872 } 873 ierr = PetscFree(send_waits);CHKERRQ(ierr); 874 ierr = PetscFree(svalues);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 #undef __FUNCT__ 879 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 880 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 881 { 882 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 883 PetscErrorCode ierr; 884 PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1; 885 PetscInt i,*owners = A->rmap->range; 886 PetscInt *nprocs,j,idx,nsends; 887 PetscInt nmax,*svalues,*starts,*owner,nrecvs; 888 PetscInt *rvalues,count,base,slen,*source; 889 PetscInt *lens,*lrows,*values,m; 890 MPI_Comm comm = ((PetscObject)A)->comm; 891 MPI_Request *send_waits,*recv_waits; 892 MPI_Status recv_status,*send_status; 893 const PetscScalar *xx; 894 PetscScalar *bb,*mask; 895 Vec xmask,lmask; 896 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 897 const PetscInt *aj, *ii,*ridx; 898 PetscScalar *aa; 899 #if defined(PETSC_DEBUG) 900 PetscBool found = PETSC_FALSE; 901 #endif 902 903 PetscFunctionBegin; 904 /* first count number of contributors to each processor */ 905 ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr); 906 ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr); 907 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/ 908 j = 0; 909 for (i=0; i<N; i++) { 910 if (lastidx > (idx = rows[i])) j = 0; 911 lastidx = idx; 912 for (; j<size; j++) { 913 if (idx >= owners[j] && idx < owners[j+1]) { 914 nprocs[2*j]++; 915 nprocs[2*j+1] = 1; 916 owner[i] = j; 917 #if defined(PETSC_DEBUG) 918 found = PETSC_TRUE; 919 #endif 920 break; 921 } 922 } 923 #if defined(PETSC_DEBUG) 924 if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range"); 925 found = PETSC_FALSE; 926 #endif 927 } 928 nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];} 929 930 /* inform other processors of number of messages and max length*/ 931 ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr); 932 933 /* post receives: */ 934 ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr); 935 ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 936 for (i=0; i<nrecvs; i++) { 937 ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 938 } 939 940 /* do sends: 941 1) starts[i] gives the starting index in svalues for stuff going to 942 the ith processor 943 */ 944 ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr); 945 ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 946 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr); 947 starts[0] = 0; 948 for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 949 for (i=0; i<N; i++) { 950 svalues[starts[owner[i]]++] = rows[i]; 951 } 952 953 starts[0] = 0; 954 for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];} 955 count = 0; 956 for (i=0; i<size; i++) { 957 if (nprocs[2*i+1]) { 958 ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 959 } 960 } 961 ierr = PetscFree(starts);CHKERRQ(ierr); 962 963 base = owners[rank]; 964 965 /* wait on receives */ 966 ierr = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr); 967 count = nrecvs; slen = 0; 968 while (count) { 969 ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 970 /* unpack receives into our local space */ 971 ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr); 972 source[imdex] = recv_status.MPI_SOURCE; 973 lens[imdex] = n; 974 slen += n; 975 count--; 976 } 977 ierr = PetscFree(recv_waits);CHKERRQ(ierr); 978 979 /* move the data into the send scatter */ 980 ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr); 981 count = 0; 982 for (i=0; i<nrecvs; i++) { 983 values = rvalues + i*nmax; 984 for (j=0; j<lens[i]; j++) { 985 lrows[count++] = values[j] - base; 986 } 987 } 988 ierr = PetscFree(rvalues);CHKERRQ(ierr); 989 ierr = PetscFree2(lens,source);CHKERRQ(ierr); 990 ierr = PetscFree(owner);CHKERRQ(ierr); 991 ierr = PetscFree(nprocs);CHKERRQ(ierr); 992 /* lrows are the local rows to be zeroed, slen is the number of local rows */ 993 994 /* zero diagonal part of matrix */ 995 ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr); 996 997 /* handle off diagonal part of matrix */ 998 ierr = MatGetVecs(A,&xmask,PETSC_NULL);CHKERRQ(ierr); 999 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1000 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1001 for (i=0; i<slen; i++) { 1002 bb[lrows[i]] = 1; 1003 } 1004 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1005 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1006 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1008 if (x) { 1009 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1010 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1011 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1012 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1013 } 1014 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1015 1016 /* remove zeroed rows of off diagonal matrix */ 1017 ii = aij->i; 1018 for (i=0; i<slen; i++) { 1019 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1020 } 1021 1022 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1023 if (aij->compressedrow.use){ 1024 m = aij->compressedrow.nrows; 1025 ii = aij->compressedrow.i; 1026 ridx = aij->compressedrow.rindex; 1027 for (i=0; i<m; i++){ 1028 n = ii[i+1] - ii[i]; 1029 aj = aij->j + ii[i]; 1030 aa = aij->a + ii[i]; 1031 1032 for (j=0; j<n; j++) { 1033 if (PetscAbsScalar(mask[*aj])) { 1034 if (b) bb[*ridx] -= *aa*xx[*aj]; 1035 *aa = 0.0; 1036 } 1037 aa++; 1038 aj++; 1039 } 1040 ridx++; 1041 } 1042 } else { /* do not use compressed row format */ 1043 m = l->B->rmap->n; 1044 for (i=0; i<m; i++) { 1045 n = ii[i+1] - ii[i]; 1046 aj = aij->j + ii[i]; 1047 aa = aij->a + ii[i]; 1048 for (j=0; j<n; j++) { 1049 if (PetscAbsScalar(mask[*aj])) { 1050 if (b) bb[i] -= *aa*xx[*aj]; 1051 *aa = 0.0; 1052 } 1053 aa++; 1054 aj++; 1055 } 1056 } 1057 } 1058 if (x) { 1059 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1060 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1061 } 1062 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1063 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1064 ierr = PetscFree(lrows);CHKERRQ(ierr); 1065 1066 /* wait on sends */ 1067 if (nsends) { 1068 ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 1069 ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1070 ierr = PetscFree(send_status);CHKERRQ(ierr); 1071 } 1072 ierr = PetscFree(send_waits);CHKERRQ(ierr); 1073 ierr = PetscFree(svalues);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatMult_MPIAIJ" 1080 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 PetscInt nt; 1085 1086 PetscFunctionBegin; 1087 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1088 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1089 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1090 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1091 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1092 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 1098 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1099 { 1100 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1101 PetscErrorCode ierr; 1102 1103 PetscFunctionBegin; 1104 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatMultAdd_MPIAIJ" 1110 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1111 { 1112 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1113 PetscErrorCode ierr; 1114 1115 PetscFunctionBegin; 1116 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1118 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 1125 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 PetscBool merged; 1130 1131 PetscFunctionBegin; 1132 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1133 /* do nondiagonal part */ 1134 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1135 if (!merged) { 1136 /* send it on its way */ 1137 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* receive remote parts: note this assumes the values are not actually */ 1141 /* added in yy until the next line, */ 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 } else { 1144 /* do local part */ 1145 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1146 /* send it on its way */ 1147 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1148 /* values actually were received in the Begin() but we need to call this nop */ 1149 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1150 } 1151 PetscFunctionReturn(0); 1152 } 1153 1154 EXTERN_C_BEGIN 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1157 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1158 { 1159 MPI_Comm comm; 1160 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij; 1161 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1162 IS Me,Notme; 1163 PetscErrorCode ierr; 1164 PetscInt M,N,first,last,*notme,i; 1165 PetscMPIInt size; 1166 1167 PetscFunctionBegin; 1168 1169 /* Easy test: symmetric diagonal block */ 1170 Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A; 1171 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1172 if (!*f) PetscFunctionReturn(0); 1173 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1174 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1175 if (size == 1) PetscFunctionReturn(0); 1176 1177 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1178 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1179 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1180 ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);CHKERRQ(ierr); 1181 for (i=0; i<first; i++) notme[i] = i; 1182 for (i=last; i<M; i++) notme[i-last+first] = i; 1183 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1184 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1185 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1186 Aoff = Aoffs[0]; 1187 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1188 Boff = Boffs[0]; 1189 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1190 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1191 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1192 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1193 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1194 ierr = PetscFree(notme);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 EXTERN_C_END 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1201 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1202 { 1203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1204 PetscErrorCode ierr; 1205 1206 PetscFunctionBegin; 1207 /* do nondiagonal part */ 1208 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1209 /* send it on its way */ 1210 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 /* do local part */ 1212 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1213 /* receive remote parts */ 1214 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* 1219 This only works correctly for square matrices where the subblock A->A is the 1220 diagonal block 1221 */ 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1224 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1225 { 1226 PetscErrorCode ierr; 1227 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1228 1229 PetscFunctionBegin; 1230 if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1231 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1232 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatScale_MPIAIJ" 1238 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1239 { 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 PetscErrorCode ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1245 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatDestroy_MPIAIJ" 1251 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1252 { 1253 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1254 PetscErrorCode ierr; 1255 1256 PetscFunctionBegin; 1257 #if defined(PETSC_USE_LOG) 1258 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1259 #endif 1260 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1261 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1262 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1263 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1264 #if defined (PETSC_USE_CTABLE) 1265 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1266 #else 1267 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1268 #endif 1269 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1271 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1272 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1273 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1274 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1275 1276 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1277 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr); 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr); 1285 PetscFunctionReturn(0); 1286 } 1287 1288 #undef __FUNCT__ 1289 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1290 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1291 { 1292 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1293 Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data; 1294 Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data; 1295 PetscErrorCode ierr; 1296 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1297 int fd; 1298 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1299 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz; 1300 PetscScalar *column_values; 1301 PetscInt message_count,flowcontrolcount; 1302 FILE *file; 1303 1304 PetscFunctionBegin; 1305 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr); 1307 nz = A->nz + B->nz; 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1313 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) { 1319 rlen = PetscMax(rlen,range[i+1] - range[i]); 1320 } 1321 } else { 1322 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1323 rlen = mat->rmap->n; 1324 } 1325 1326 /* load up the local row counts */ 1327 ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr); 1328 for (i=0; i<mat->rmap->n; i++) { 1329 row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1330 } 1331 1332 /* store the row lengths to the file */ 1333 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1334 if (!rank) { 1335 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1336 for (i=1; i<size; i++) { 1337 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1338 rlen = range[i+1] - range[i]; 1339 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1340 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1341 } 1342 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1343 } else { 1344 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1345 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1346 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1347 } 1348 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1349 1350 /* load up the local column indices */ 1351 nzmax = nz; /* )th processor needs space a largest processor needs */ 1352 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr); 1353 ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr); 1354 cnt = 0; 1355 for (i=0; i<mat->rmap->n; i++) { 1356 for (j=B->i[i]; j<B->i[i+1]; j++) { 1357 if ( (col = garray[B->j[j]]) > cstart) break; 1358 column_indices[cnt++] = col; 1359 } 1360 for (k=A->i[i]; k<A->i[i+1]; k++) { 1361 column_indices[cnt++] = A->j[k] + cstart; 1362 } 1363 for (; j<B->i[i+1]; j++) { 1364 column_indices[cnt++] = garray[B->j[j]]; 1365 } 1366 } 1367 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1368 1369 /* store the column indices to the file */ 1370 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1371 if (!rank) { 1372 MPI_Status status; 1373 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1374 for (i=1; i<size; i++) { 1375 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1376 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1377 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1378 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1379 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1380 } 1381 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1382 } else { 1383 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1384 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1385 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1386 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1387 } 1388 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1389 1390 /* load up the local column values */ 1391 ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr); 1392 cnt = 0; 1393 for (i=0; i<mat->rmap->n; i++) { 1394 for (j=B->i[i]; j<B->i[i+1]; j++) { 1395 if ( garray[B->j[j]] > cstart) break; 1396 column_values[cnt++] = B->a[j]; 1397 } 1398 for (k=A->i[i]; k<A->i[i+1]; k++) { 1399 column_values[cnt++] = A->a[k]; 1400 } 1401 for (; j<B->i[i+1]; j++) { 1402 column_values[cnt++] = B->a[j]; 1403 } 1404 } 1405 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1406 1407 /* store the column values to the file */ 1408 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1409 if (!rank) { 1410 MPI_Status status; 1411 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1412 for (i=1; i<size; i++) { 1413 ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr); 1414 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr); 1415 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1416 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1417 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1418 } 1419 ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr); 1422 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1423 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr); 1424 ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr); 1425 } 1426 ierr = PetscFree(column_values);CHKERRQ(ierr); 1427 1428 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1429 if (file) { 1430 fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1431 } 1432 1433 PetscFunctionReturn(0); 1434 } 1435 1436 #undef __FUNCT__ 1437 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1438 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1439 { 1440 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1441 PetscErrorCode ierr; 1442 PetscMPIInt rank = aij->rank,size = aij->size; 1443 PetscBool isdraw,iascii,isbinary; 1444 PetscViewer sviewer; 1445 PetscViewerFormat format; 1446 1447 PetscFunctionBegin; 1448 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1449 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1450 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1451 if (iascii) { 1452 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1453 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1454 MatInfo info; 1455 PetscBool inodes; 1456 1457 ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr); 1458 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1459 ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr); 1460 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 1461 if (!inodes) { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1464 } else { 1465 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1466 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1467 } 1468 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1469 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1470 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1472 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 1474 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1475 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1478 PetscInt inodecount,inodelimit,*inodes; 1479 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1480 if (inodes) { 1481 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1482 } else { 1483 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1484 } 1485 PetscFunctionReturn(0); 1486 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1487 PetscFunctionReturn(0); 1488 } 1489 } else if (isbinary) { 1490 if (size == 1) { 1491 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1492 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1493 } else { 1494 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1495 } 1496 PetscFunctionReturn(0); 1497 } else if (isdraw) { 1498 PetscDraw draw; 1499 PetscBool isnull; 1500 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1501 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1502 } 1503 1504 if (size == 1) { 1505 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1506 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1507 } else { 1508 /* assemble the entire matrix onto first processor. */ 1509 Mat A; 1510 Mat_SeqAIJ *Aloc; 1511 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1512 MatScalar *a; 1513 1514 if (mat->rmap->N > 1024) { 1515 PetscBool flg = PETSC_FALSE; 1516 1517 ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr); 1518 if (!flg) { 1519 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large."); 1520 } 1521 } 1522 1523 ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1526 } else { 1527 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1528 } 1529 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1530 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1531 ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr); 1532 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1533 ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr); 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqAIJ*)aij->A->data; 1537 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1538 row = mat->rmap->rstart; 1539 for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;} 1540 for (i=0; i<m; i++) { 1541 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1542 row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1543 } 1544 aj = Aloc->j; 1545 for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;} 1546 1547 /* copy over the B part */ 1548 Aloc = (Mat_SeqAIJ*)aij->B->data; 1549 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550 row = mat->rmap->rstart; 1551 ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr); 1552 ct = cols; 1553 for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];} 1554 for (i=0; i<m; i++) { 1555 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1556 row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1557 } 1558 ierr = PetscFree(ct);CHKERRQ(ierr); 1559 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1560 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1561 /* 1562 Everyone has to call to draw the matrix since the graphics waits are 1563 synchronized across all processors that share the PetscDraw object 1564 */ 1565 ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1566 if (!rank) { 1567 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1568 /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/ 1569 PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ); 1570 ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1571 } 1572 ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1573 ierr = MatDestroy(&A);CHKERRQ(ierr); 1574 } 1575 PetscFunctionReturn(0); 1576 } 1577 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatView_MPIAIJ" 1580 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1581 { 1582 PetscErrorCode ierr; 1583 PetscBool iascii,isdraw,issocket,isbinary; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1587 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1588 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1589 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1590 if (iascii || isdraw || isbinary || issocket) { 1591 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1592 } else { 1593 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name); 1594 } 1595 PetscFunctionReturn(0); 1596 } 1597 1598 #undef __FUNCT__ 1599 #define __FUNCT__ "MatSOR_MPIAIJ" 1600 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1601 { 1602 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1603 PetscErrorCode ierr; 1604 Vec bb1 = 0; 1605 PetscBool hasop; 1606 1607 PetscFunctionBegin; 1608 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1609 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1610 } 1611 1612 if (flag == SOR_APPLY_UPPER) { 1613 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){ 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 1623 while (its--) { 1624 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1626 1627 /* update rhs: bb1 = bb - B*x */ 1628 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1629 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1630 1631 /* local sweep */ 1632 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1633 } 1634 } else if (flag & SOR_LOCAL_FORWARD_SWEEP){ 1635 if (flag & SOR_ZERO_INITIAL_GUESS) { 1636 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1637 its--; 1638 } 1639 while (its--) { 1640 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1642 1643 /* update rhs: bb1 = bb - B*x */ 1644 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1645 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1646 1647 /* local sweep */ 1648 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1649 } 1650 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){ 1651 if (flag & SOR_ZERO_INITIAL_GUESS) { 1652 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1653 its--; 1654 } 1655 while (its--) { 1656 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1657 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1658 1659 /* update rhs: bb1 = bb - B*x */ 1660 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1661 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1662 1663 /* local sweep */ 1664 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1665 } 1666 } else if (flag & SOR_EISENSTAT) { 1667 Vec xx1; 1668 1669 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1670 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1671 1672 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1673 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1674 if (!mat->diag) { 1675 ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr); 1676 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1677 } 1678 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1679 if (hasop) { 1680 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1681 } else { 1682 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1683 } 1684 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1685 1686 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1687 1688 /* local sweep */ 1689 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1690 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1691 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1692 } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel SOR not supported"); 1693 1694 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1695 PetscFunctionReturn(0); 1696 } 1697 1698 #undef __FUNCT__ 1699 #define __FUNCT__ "MatPermute_MPIAIJ" 1700 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1701 { 1702 MPI_Comm comm; 1703 PetscInt first,local_rowsize,local_colsize; 1704 const PetscInt *rows; 1705 IS crowp,growp,irowp,lrowp,lcolp,icolp; 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1710 /* make a collective version of 'rowp', this is to be tolerant of users who pass serial index sets */ 1711 ierr = ISOnComm(rowp,comm,PETSC_USE_POINTER,&crowp);CHKERRQ(ierr); 1712 /* collect the global row permutation and invert it */ 1713 ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr); 1714 ierr = ISSetPermutation(growp);CHKERRQ(ierr); 1715 ierr = ISDestroy(&crowp);CHKERRQ(ierr); 1716 ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr); 1717 ierr = ISDestroy(&growp);CHKERRQ(ierr); 1718 /* get the local target indices */ 1719 ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr); 1720 ierr = MatGetLocalSize(A,&local_rowsize,&local_colsize);CHKERRQ(ierr); 1721 ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr); 1722 ierr = ISCreateGeneral(PETSC_COMM_SELF,local_rowsize,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr); 1723 ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr); 1724 ierr = ISDestroy(&irowp);CHKERRQ(ierr); 1725 /* the column permutation is so much easier; 1726 make a local version of 'colp' and invert it */ 1727 ierr = ISOnComm(colp,PETSC_COMM_SELF,PETSC_USE_POINTER,&lcolp);CHKERRQ(ierr); 1728 ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 1729 ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr); 1730 ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 1731 /* now we just get the submatrix */ 1732 ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_colsize,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1733 /* clean up */ 1734 ierr = ISDestroy(&lrowp);CHKERRQ(ierr); 1735 ierr = ISDestroy(&icolp);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1741 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1744 Mat A = mat->A,B = mat->B; 1745 PetscErrorCode ierr; 1746 PetscReal isend[5],irecv[5]; 1747 1748 PetscFunctionBegin; 1749 info->block_size = 1.0; 1750 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1754 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1755 isend[3] += info->memory; isend[4] += info->mallocs; 1756 if (flag == MAT_LOCAL) { 1757 info->nz_used = isend[0]; 1758 info->nz_allocated = isend[1]; 1759 info->nz_unneeded = isend[2]; 1760 info->memory = isend[3]; 1761 info->mallocs = isend[4]; 1762 } else if (flag == MAT_GLOBAL_MAX) { 1763 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr); 1764 info->nz_used = irecv[0]; 1765 info->nz_allocated = irecv[1]; 1766 info->nz_unneeded = irecv[2]; 1767 info->memory = irecv[3]; 1768 info->mallocs = irecv[4]; 1769 } else if (flag == MAT_GLOBAL_SUM) { 1770 ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr); 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } 1777 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1778 info->fill_ratio_needed = 0; 1779 info->factor_mallocs = 0; 1780 1781 PetscFunctionReturn(0); 1782 } 1783 1784 #undef __FUNCT__ 1785 #define __FUNCT__ "MatSetOption_MPIAIJ" 1786 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1787 { 1788 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1789 PetscErrorCode ierr; 1790 1791 PetscFunctionBegin; 1792 switch (op) { 1793 case MAT_NEW_NONZERO_LOCATIONS: 1794 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1795 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1796 case MAT_KEEP_NONZERO_PATTERN: 1797 case MAT_NEW_NONZERO_LOCATION_ERR: 1798 case MAT_USE_INODES: 1799 case MAT_IGNORE_ZERO_ENTRIES: 1800 MatCheckPreallocated(A,1); 1801 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1802 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1803 break; 1804 case MAT_ROW_ORIENTED: 1805 a->roworiented = flg; 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_NEW_DIAGONALS: 1810 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1811 break; 1812 case MAT_IGNORE_OFF_PROC_ENTRIES: 1813 a->donotstash = flg; 1814 break; 1815 case MAT_SPD: 1816 A->spd_set = PETSC_TRUE; 1817 A->spd = flg; 1818 if (flg) { 1819 A->symmetric = PETSC_TRUE; 1820 A->structurally_symmetric = PETSC_TRUE; 1821 A->symmetric_set = PETSC_TRUE; 1822 A->structurally_symmetric_set = PETSC_TRUE; 1823 } 1824 break; 1825 case MAT_SYMMETRIC: 1826 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1827 break; 1828 case MAT_STRUCTURALLY_SYMMETRIC: 1829 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1830 break; 1831 case MAT_HERMITIAN: 1832 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_SYMMETRY_ETERNAL: 1835 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1836 break; 1837 #if defined(PETSC_HAVE_TXPETSCGPU) 1838 // Sorry, I don't know how else to add MatOptions for data specific 1839 // to GPU classes without protecting against it in this case statement. 1840 // If anyone knows of a better way, let me know. Paul 1841 case MAT_DIAGBLOCK_CSR: 1842 case MAT_OFFDIAGBLOCK_CSR: 1843 case MAT_CSR: 1844 case MAT_DIAGBLOCK_DIA: 1845 case MAT_OFFDIAGBLOCK_DIA: 1846 case MAT_DIA: 1847 case MAT_DIAGBLOCK_ELL: 1848 case MAT_OFFDIAGBLOCK_ELL: 1849 case MAT_ELL: 1850 case MAT_DIAGBLOCK_HYB: 1851 case MAT_OFFDIAGBLOCK_HYB: 1852 case MAT_HYB: 1853 /* Not an error because MatSetOption_MPIAIJCUSP/CUSPARSE handle these options */ 1854 break; 1855 #endif 1856 default: 1857 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1858 } 1859 PetscFunctionReturn(0); 1860 } 1861 1862 #undef __FUNCT__ 1863 #define __FUNCT__ "MatGetRow_MPIAIJ" 1864 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1865 { 1866 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1867 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1868 PetscErrorCode ierr; 1869 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1870 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1871 PetscInt *cmap,*idx_p; 1872 1873 PetscFunctionBegin; 1874 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1875 mat->getrowactive = PETSC_TRUE; 1876 1877 if (!mat->rowvalues && (idx || v)) { 1878 /* 1879 allocate enough space to hold information from the longest row. 1880 */ 1881 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1882 PetscInt max = 1,tmp; 1883 for (i=0; i<matin->rmap->n; i++) { 1884 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1885 if (max < tmp) { max = tmp; } 1886 } 1887 ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr); 1888 } 1889 1890 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1891 lrow = row - rstart; 1892 1893 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1894 if (!v) {pvA = 0; pvB = 0;} 1895 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1896 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1897 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1898 nztot = nzA + nzB; 1899 1900 cmap = mat->garray; 1901 if (v || idx) { 1902 if (nztot) { 1903 /* Sort by increasing column numbers, assuming A and B already sorted */ 1904 PetscInt imark = -1; 1905 if (v) { 1906 *v = v_p = mat->rowvalues; 1907 for (i=0; i<nzB; i++) { 1908 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1909 else break; 1910 } 1911 imark = i; 1912 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1913 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1914 } 1915 if (idx) { 1916 *idx = idx_p = mat->rowindices; 1917 if (imark > -1) { 1918 for (i=0; i<imark; i++) { 1919 idx_p[i] = cmap[cworkB[i]]; 1920 } 1921 } else { 1922 for (i=0; i<nzB; i++) { 1923 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1924 else break; 1925 } 1926 imark = i; 1927 } 1928 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1929 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1930 } 1931 } else { 1932 if (idx) *idx = 0; 1933 if (v) *v = 0; 1934 } 1935 } 1936 *nz = nztot; 1937 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1938 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1944 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1945 { 1946 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1947 1948 PetscFunctionBegin; 1949 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1950 aij->getrowactive = PETSC_FALSE; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatNorm_MPIAIJ" 1956 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1957 { 1958 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1959 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1960 PetscErrorCode ierr; 1961 PetscInt i,j,cstart = mat->cmap->rstart; 1962 PetscReal sum = 0.0; 1963 MatScalar *v; 1964 1965 PetscFunctionBegin; 1966 if (aij->size == 1) { 1967 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1968 } else { 1969 if (type == NORM_FROBENIUS) { 1970 v = amat->a; 1971 for (i=0; i<amat->nz; i++) { 1972 #if defined(PETSC_USE_COMPLEX) 1973 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1974 #else 1975 sum += (*v)*(*v); v++; 1976 #endif 1977 } 1978 v = bmat->a; 1979 for (i=0; i<bmat->nz; i++) { 1980 #if defined(PETSC_USE_COMPLEX) 1981 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1982 #else 1983 sum += (*v)*(*v); v++; 1984 #endif 1985 } 1986 ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 1987 *norm = PetscSqrtReal(*norm); 1988 } else if (type == NORM_1) { /* max column norm */ 1989 PetscReal *tmp,*tmp2; 1990 PetscInt *jj,*garray = aij->garray; 1991 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr); 1992 ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr); 1993 ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 1994 *norm = 0.0; 1995 v = amat->a; jj = amat->j; 1996 for (j=0; j<amat->nz; j++) { 1997 tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++; 1998 } 1999 v = bmat->a; jj = bmat->j; 2000 for (j=0; j<bmat->nz; j++) { 2001 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2002 } 2003 ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr); 2004 for (j=0; j<mat->cmap->N; j++) { 2005 if (tmp2[j] > *norm) *norm = tmp2[j]; 2006 } 2007 ierr = PetscFree(tmp);CHKERRQ(ierr); 2008 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2009 } else if (type == NORM_INFINITY) { /* max row norm */ 2010 PetscReal ntemp = 0.0; 2011 for (j=0; j<aij->A->rmap->n; j++) { 2012 v = amat->a + amat->i[j]; 2013 sum = 0.0; 2014 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2015 sum += PetscAbsScalar(*v); v++; 2016 } 2017 v = bmat->a + bmat->i[j]; 2018 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2019 sum += PetscAbsScalar(*v); v++; 2020 } 2021 if (sum > ntemp) ntemp = sum; 2022 } 2023 ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr); 2024 } else { 2025 SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm"); 2026 } 2027 } 2028 PetscFunctionReturn(0); 2029 } 2030 2031 #undef __FUNCT__ 2032 #define __FUNCT__ "MatTranspose_MPIAIJ" 2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2034 { 2035 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2036 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 2037 PetscErrorCode ierr; 2038 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz; 2039 PetscInt cstart=A->cmap->rstart,ncol; 2040 Mat B; 2041 MatScalar *array; 2042 2043 PetscFunctionBegin; 2044 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 2045 2046 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; 2047 ai = Aloc->i; aj = Aloc->j; 2048 bi = Bloc->i; bj = Bloc->j; 2049 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2050 /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */ 2051 ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr); 2052 ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr); 2053 for (i=0; i<ai[ma]; i++){ 2054 d_nnz[aj[i]] ++; 2055 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2056 } 2057 2058 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2059 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2060 ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs); CHKERRQ(ierr); 2061 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2062 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr); 2063 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2064 ierr = PetscFree(d_nnz);CHKERRQ(ierr); 2065 } else { 2066 B = *matout; 2067 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2068 for (i=0; i<ai[ma]; i++){ 2069 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 2070 } 2071 } 2072 2073 /* copy over the A part */ 2074 array = Aloc->a; 2075 row = A->rmap->rstart; 2076 for (i=0; i<ma; i++) { 2077 ncol = ai[i+1]-ai[i]; 2078 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2079 row++; array += ncol; aj += ncol; 2080 } 2081 aj = Aloc->j; 2082 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2083 2084 /* copy over the B part */ 2085 ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr); 2086 ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr); 2087 array = Bloc->a; 2088 row = A->rmap->rstart; 2089 for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];} 2090 cols_tmp = cols; 2091 for (i=0; i<mb; i++) { 2092 ncol = bi[i+1]-bi[i]; 2093 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2094 row++; array += ncol; cols_tmp += ncol; 2095 } 2096 ierr = PetscFree(cols);CHKERRQ(ierr); 2097 2098 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2099 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2100 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2101 *matout = B; 2102 } else { 2103 ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 2104 } 2105 PetscFunctionReturn(0); 2106 } 2107 2108 #undef __FUNCT__ 2109 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2110 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2111 { 2112 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2113 Mat a = aij->A,b = aij->B; 2114 PetscErrorCode ierr; 2115 PetscInt s1,s2,s3; 2116 2117 PetscFunctionBegin; 2118 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2119 if (rr) { 2120 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2121 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2122 /* Overlap communication with computation. */ 2123 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2124 } 2125 if (ll) { 2126 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2127 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2128 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2129 } 2130 /* scale the diagonal block */ 2131 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2132 2133 if (rr) { 2134 /* Do a scatter end and then right scale the off-diagonal block */ 2135 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2137 } 2138 2139 PetscFunctionReturn(0); 2140 } 2141 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2144 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2145 { 2146 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #undef __FUNCT__ 2155 #define __FUNCT__ "MatEqual_MPIAIJ" 2156 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2157 { 2158 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2159 Mat a,b,c,d; 2160 PetscBool flg; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 a = matA->A; b = matA->B; 2165 c = matB->A; d = matB->B; 2166 2167 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2168 if (flg) { 2169 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2170 } 2171 ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 #undef __FUNCT__ 2176 #define __FUNCT__ "MatCopy_MPIAIJ" 2177 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2178 { 2179 PetscErrorCode ierr; 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2181 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2182 2183 PetscFunctionBegin; 2184 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2185 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2186 /* because of the column compression in the off-processor part of the matrix a->B, 2187 the number of columns in a->B and b->B may be different, hence we cannot call 2188 the MatCopy() directly on the two parts. If need be, we can provide a more 2189 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2190 then copying the submatrices */ 2191 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2192 } else { 2193 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2194 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 #undef __FUNCT__ 2200 #define __FUNCT__ "MatSetUp_MPIAIJ" 2201 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2202 { 2203 PetscErrorCode ierr; 2204 2205 PetscFunctionBegin; 2206 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 #undef __FUNCT__ 2211 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt* nnz) 2214 { 2215 PetscInt i,m=Y->rmap->N; 2216 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2217 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2218 const PetscInt *xi = x->i,*yi = y->i; 2219 2220 PetscFunctionBegin; 2221 /* Set the number of nonzeros in the new matrix */ 2222 for(i=0; i<m; i++) { 2223 PetscInt j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i]; 2224 const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i]; 2225 nnz[i] = 0; 2226 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2227 for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */ 2228 if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++; /* Skip duplicate */ 2229 nnz[i]++; 2230 } 2231 for (; k<nzy; k++) nnz[i]++; 2232 } 2233 PetscFunctionReturn(0); 2234 } 2235 2236 #undef __FUNCT__ 2237 #define __FUNCT__ "MatAXPY_MPIAIJ" 2238 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2239 { 2240 PetscErrorCode ierr; 2241 PetscInt i; 2242 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data; 2243 PetscBLASInt bnz,one=1; 2244 Mat_SeqAIJ *x,*y; 2245 2246 PetscFunctionBegin; 2247 if (str == SAME_NONZERO_PATTERN) { 2248 PetscScalar alpha = a; 2249 x = (Mat_SeqAIJ *)xx->A->data; 2250 y = (Mat_SeqAIJ *)yy->A->data; 2251 bnz = PetscBLASIntCast(x->nz); 2252 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2253 x = (Mat_SeqAIJ *)xx->B->data; 2254 y = (Mat_SeqAIJ *)yy->B->data; 2255 bnz = PetscBLASIntCast(x->nz); 2256 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 2257 } else if (str == SUBSET_NONZERO_PATTERN) { 2258 ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr); 2259 2260 x = (Mat_SeqAIJ *)xx->B->data; 2261 y = (Mat_SeqAIJ *)yy->B->data; 2262 if (y->xtoy && y->XtoY != xx->B) { 2263 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 2264 ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr); 2265 } 2266 if (!y->xtoy) { /* get xtoy */ 2267 ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr); 2268 y->XtoY = xx->B; 2269 ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr); 2270 } 2271 for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]); 2272 } else { 2273 Mat B; 2274 PetscInt *nnz_d,*nnz_o; 2275 ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr); 2276 ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr); 2277 ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr); 2278 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2279 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2280 ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr); 2281 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2282 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2283 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2284 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2285 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2286 ierr = MatHeaderReplace(Y,B); 2287 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2288 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2294 2295 #undef __FUNCT__ 2296 #define __FUNCT__ "MatConjugate_MPIAIJ" 2297 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2298 { 2299 #if defined(PETSC_USE_COMPLEX) 2300 PetscErrorCode ierr; 2301 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2302 2303 PetscFunctionBegin; 2304 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2305 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2306 #else 2307 PetscFunctionBegin; 2308 #endif 2309 PetscFunctionReturn(0); 2310 } 2311 2312 #undef __FUNCT__ 2313 #define __FUNCT__ "MatRealPart_MPIAIJ" 2314 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2315 { 2316 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2321 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNCT__ 2326 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2327 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2328 { 2329 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2330 PetscErrorCode ierr; 2331 2332 PetscFunctionBegin; 2333 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2334 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 #ifdef PETSC_HAVE_PBGL 2339 2340 #include <boost/parallel/mpi/bsp_process_group.hpp> 2341 #include <boost/graph/distributed/ilu_default_graph.hpp> 2342 #include <boost/graph/distributed/ilu_0_block.hpp> 2343 #include <boost/graph/distributed/ilu_preconditioner.hpp> 2344 #include <boost/graph/distributed/petsc/interface.hpp> 2345 #include <boost/multi_array.hpp> 2346 #include <boost/parallel/distributed_property_map->hpp> 2347 2348 #undef __FUNCT__ 2349 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ" 2350 /* 2351 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2352 */ 2353 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 2354 { 2355 namespace petsc = boost::distributed::petsc; 2356 2357 namespace graph_dist = boost::graph::distributed; 2358 using boost::graph::distributed::ilu_default::process_group_type; 2359 using boost::graph::ilu_permuted; 2360 2361 PetscBool row_identity, col_identity; 2362 PetscContainer c; 2363 PetscInt m, n, M, N; 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu"); 2368 ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr); 2369 ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr); 2370 if (!row_identity || !col_identity) { 2371 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU"); 2372 } 2373 2374 process_group_type pg; 2375 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2376 lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg)); 2377 lgraph_type& level_graph = *lgraph_p; 2378 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2379 2380 petsc::read_matrix(A, graph, get(boost::edge_weight, graph)); 2381 ilu_permuted(level_graph); 2382 2383 /* put together the new matrix */ 2384 ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr); 2385 ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr); 2386 ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr); 2387 ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr); 2388 ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs); CHKERRQ(ierr); 2389 ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr); 2390 ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2391 ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2392 2393 ierr = PetscContainerCreate(((PetscObject)A)->comm, &c); 2394 ierr = PetscContainerSetPointer(c, lgraph_p); 2395 ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c); 2396 ierr = PetscContainerDestroy(&c); 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ" 2402 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info) 2403 { 2404 PetscFunctionBegin; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatSolve_MPIAIJ" 2410 /* 2411 This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu> 2412 */ 2413 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x) 2414 { 2415 namespace graph_dist = boost::graph::distributed; 2416 2417 typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type; 2418 lgraph_type* lgraph_p; 2419 PetscContainer c; 2420 PetscErrorCode ierr; 2421 2422 PetscFunctionBegin; 2423 ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr); 2424 ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr); 2425 ierr = VecCopy(b, x);CHKERRQ(ierr); 2426 2427 PetscScalar* array_x; 2428 ierr = VecGetArray(x, &array_x);CHKERRQ(ierr); 2429 PetscInt sx; 2430 ierr = VecGetSize(x, &sx);CHKERRQ(ierr); 2431 2432 PetscScalar* array_b; 2433 ierr = VecGetArray(b, &array_b);CHKERRQ(ierr); 2434 PetscInt sb; 2435 ierr = VecGetSize(b, &sb);CHKERRQ(ierr); 2436 2437 lgraph_type& level_graph = *lgraph_p; 2438 graph_dist::ilu_default::graph_type& graph(level_graph.graph); 2439 2440 typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type; 2441 array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]), 2442 ref_x(array_x, boost::extents[num_vertices(graph)]); 2443 2444 typedef boost::iterator_property_map<array_ref_type::iterator, 2445 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type; 2446 gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)), 2447 vector_x(ref_x.begin(), get(boost::vertex_index, graph)); 2448 2449 ilu_set_solve(*lgraph_p, vector_b, vector_x); 2450 2451 PetscFunctionReturn(0); 2452 } 2453 #endif 2454 2455 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */ 2456 PetscInt nzlocal,nsends,nrecvs; 2457 PetscMPIInt *send_rank,*recv_rank; 2458 PetscInt *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j; 2459 PetscScalar *sbuf_a,**rbuf_a; 2460 PetscErrorCode (*Destroy)(Mat); 2461 } Mat_Redundant; 2462 2463 #undef __FUNCT__ 2464 #define __FUNCT__ "PetscContainerDestroy_MatRedundant" 2465 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr) 2466 { 2467 PetscErrorCode ierr; 2468 Mat_Redundant *redund=(Mat_Redundant*)ptr; 2469 PetscInt i; 2470 2471 PetscFunctionBegin; 2472 ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr); 2473 ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr); 2474 ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr); 2475 for (i=0; i<redund->nrecvs; i++){ 2476 ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr); 2477 ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr); 2478 } 2479 ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr); 2480 ierr = PetscFree(redund);CHKERRQ(ierr); 2481 PetscFunctionReturn(0); 2482 } 2483 2484 #undef __FUNCT__ 2485 #define __FUNCT__ "MatDestroy_MatRedundant" 2486 PetscErrorCode MatDestroy_MatRedundant(Mat A) 2487 { 2488 PetscErrorCode ierr; 2489 PetscContainer container; 2490 Mat_Redundant *redund=PETSC_NULL; 2491 2492 PetscFunctionBegin; 2493 ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2494 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2495 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2496 A->ops->destroy = redund->Destroy; 2497 ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr); 2498 if (A->ops->destroy) { 2499 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 2500 } 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ" 2506 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant) 2507 { 2508 PetscMPIInt rank,size; 2509 MPI_Comm comm=((PetscObject)mat)->comm; 2510 PetscErrorCode ierr; 2511 PetscInt nsends=0,nrecvs=0,i,rownz_max=0; 2512 PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL; 2513 PetscInt *rowrange=mat->rmap->range; 2514 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2515 Mat A=aij->A,B=aij->B,C=*matredundant; 2516 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 2517 PetscScalar *sbuf_a; 2518 PetscInt nzlocal=a->nz+b->nz; 2519 PetscInt j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB; 2520 PetscInt rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N; 2521 PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j; 2522 MatScalar *aworkA,*aworkB; 2523 PetscScalar *vals; 2524 PetscMPIInt tag1,tag2,tag3,imdex; 2525 MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL, 2526 *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL; 2527 MPI_Status recv_status,*send_status; 2528 PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count; 2529 PetscInt **rbuf_j=PETSC_NULL; 2530 PetscScalar **rbuf_a=PETSC_NULL; 2531 Mat_Redundant *redund=PETSC_NULL; 2532 PetscContainer container; 2533 2534 PetscFunctionBegin; 2535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2536 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2537 2538 if (reuse == MAT_REUSE_MATRIX) { 2539 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2540 if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size"); 2541 ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr); 2542 if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size"); 2543 ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr); 2544 if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit"); 2545 ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr); 2546 if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal"); 2547 2548 nsends = redund->nsends; 2549 nrecvs = redund->nrecvs; 2550 send_rank = redund->send_rank; 2551 recv_rank = redund->recv_rank; 2552 sbuf_nz = redund->sbuf_nz; 2553 rbuf_nz = redund->rbuf_nz; 2554 sbuf_j = redund->sbuf_j; 2555 sbuf_a = redund->sbuf_a; 2556 rbuf_j = redund->rbuf_j; 2557 rbuf_a = redund->rbuf_a; 2558 } 2559 2560 if (reuse == MAT_INITIAL_MATRIX){ 2561 PetscMPIInt subrank,subsize; 2562 PetscInt nleftover,np_subcomm; 2563 /* get the destination processors' id send_rank, nsends and nrecvs */ 2564 ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr); 2565 ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr); 2566 ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank); 2567 np_subcomm = size/nsubcomm; 2568 nleftover = size - nsubcomm*np_subcomm; 2569 nsends = 0; nrecvs = 0; 2570 for (i=0; i<size; i++){ /* i=rank*/ 2571 if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */ 2572 send_rank[nsends] = i; nsends++; 2573 recv_rank[nrecvs++] = i; 2574 } 2575 } 2576 if (rank >= size - nleftover){/* this proc is a leftover processor */ 2577 i = size-nleftover-1; 2578 j = 0; 2579 while (j < nsubcomm - nleftover){ 2580 send_rank[nsends++] = i; 2581 i--; j++; 2582 } 2583 } 2584 2585 if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */ 2586 for (i=0; i<nleftover; i++){ 2587 recv_rank[nrecvs++] = size-nleftover+i; 2588 } 2589 } 2590 2591 /* allocate sbuf_j, sbuf_a */ 2592 i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2; 2593 ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr); 2594 ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr); 2595 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2596 2597 /* copy mat's local entries into the buffers */ 2598 if (reuse == MAT_INITIAL_MATRIX){ 2599 rownz_max = 0; 2600 rptr = sbuf_j; 2601 cols = sbuf_j + rend-rstart + 1; 2602 vals = sbuf_a; 2603 rptr[0] = 0; 2604 for (i=0; i<rend-rstart; i++){ 2605 row = i + rstart; 2606 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2607 ncols = nzA + nzB; 2608 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2609 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2610 /* load the column indices for this row into cols */ 2611 lwrite = 0; 2612 for (l=0; l<nzB; l++) { 2613 if ((ctmp = bmap[cworkB[l]]) < cstart){ 2614 vals[lwrite] = aworkB[l]; 2615 cols[lwrite++] = ctmp; 2616 } 2617 } 2618 for (l=0; l<nzA; l++){ 2619 vals[lwrite] = aworkA[l]; 2620 cols[lwrite++] = cstart + cworkA[l]; 2621 } 2622 for (l=0; l<nzB; l++) { 2623 if ((ctmp = bmap[cworkB[l]]) >= cend){ 2624 vals[lwrite] = aworkB[l]; 2625 cols[lwrite++] = ctmp; 2626 } 2627 } 2628 vals += ncols; 2629 cols += ncols; 2630 rptr[i+1] = rptr[i] + ncols; 2631 if (rownz_max < ncols) rownz_max = ncols; 2632 } 2633 if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz); 2634 } else { /* only copy matrix values into sbuf_a */ 2635 rptr = sbuf_j; 2636 vals = sbuf_a; 2637 rptr[0] = 0; 2638 for (i=0; i<rend-rstart; i++){ 2639 row = i + rstart; 2640 nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i]; 2641 ncols = nzA + nzB; 2642 cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i]; 2643 aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i]; 2644 lwrite = 0; 2645 for (l=0; l<nzB; l++) { 2646 if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l]; 2647 } 2648 for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l]; 2649 for (l=0; l<nzB; l++) { 2650 if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l]; 2651 } 2652 vals += ncols; 2653 rptr[i+1] = rptr[i] + ncols; 2654 } 2655 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2656 2657 /* send nzlocal to others, and recv other's nzlocal */ 2658 /*--------------------------------------------------*/ 2659 if (reuse == MAT_INITIAL_MATRIX){ 2660 ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2661 s_waits2 = s_waits3 + nsends; 2662 s_waits1 = s_waits2 + nsends; 2663 r_waits1 = s_waits1 + nsends; 2664 r_waits2 = r_waits1 + nrecvs; 2665 r_waits3 = r_waits2 + nrecvs; 2666 } else { 2667 ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr); 2668 r_waits3 = s_waits3 + nsends; 2669 } 2670 2671 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr); 2672 if (reuse == MAT_INITIAL_MATRIX){ 2673 /* get new tags to keep the communication clean */ 2674 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr); 2675 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr); 2676 ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr); 2677 2678 /* post receives of other's nzlocal */ 2679 for (i=0; i<nrecvs; i++){ 2680 ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr); 2681 } 2682 /* send nzlocal to others */ 2683 for (i=0; i<nsends; i++){ 2684 sbuf_nz[i] = nzlocal; 2685 ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr); 2686 } 2687 /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */ 2688 count = nrecvs; 2689 while (count) { 2690 ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr); 2691 recv_rank[imdex] = recv_status.MPI_SOURCE; 2692 /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */ 2693 ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr); 2694 2695 i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */ 2696 rbuf_nz[imdex] += i + 2; 2697 ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr); 2698 ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr); 2699 count--; 2700 } 2701 /* wait on sends of nzlocal */ 2702 if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);} 2703 /* send mat->i,j to others, and recv from other's */ 2704 /*------------------------------------------------*/ 2705 for (i=0; i<nsends; i++){ 2706 j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1; 2707 ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr); 2708 } 2709 /* wait on receives of mat->i,j */ 2710 /*------------------------------*/ 2711 count = nrecvs; 2712 while (count) { 2713 ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr); 2714 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2715 count--; 2716 } 2717 /* wait on sends of mat->i,j */ 2718 /*---------------------------*/ 2719 if (nsends) { 2720 ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr); 2721 } 2722 } /* endof if (reuse == MAT_INITIAL_MATRIX) */ 2723 2724 /* post receives, send and receive mat->a */ 2725 /*----------------------------------------*/ 2726 for (imdex=0; imdex<nrecvs; imdex++) { 2727 ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr); 2728 } 2729 for (i=0; i<nsends; i++){ 2730 ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr); 2731 } 2732 count = nrecvs; 2733 while (count) { 2734 ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr); 2735 if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE); 2736 count--; 2737 } 2738 if (nsends) { 2739 ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr); 2740 } 2741 2742 ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr); 2743 2744 /* create redundant matrix */ 2745 /*-------------------------*/ 2746 if (reuse == MAT_INITIAL_MATRIX){ 2747 /* compute rownz_max for preallocation */ 2748 for (imdex=0; imdex<nrecvs; imdex++){ 2749 j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]]; 2750 rptr = rbuf_j[imdex]; 2751 for (i=0; i<j; i++){ 2752 ncols = rptr[i+1] - rptr[i]; 2753 if (rownz_max < ncols) rownz_max = ncols; 2754 } 2755 } 2756 2757 ierr = MatCreate(subcomm,&C);CHKERRQ(ierr); 2758 ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2759 ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs); CHKERRQ(ierr); 2760 ierr = MatSetFromOptions(C);CHKERRQ(ierr); 2761 ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2762 ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr); 2763 } else { 2764 C = *matredundant; 2765 } 2766 2767 /* insert local matrix entries */ 2768 rptr = sbuf_j; 2769 cols = sbuf_j + rend-rstart + 1; 2770 vals = sbuf_a; 2771 for (i=0; i<rend-rstart; i++){ 2772 row = i + rstart; 2773 ncols = rptr[i+1] - rptr[i]; 2774 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2775 vals += ncols; 2776 cols += ncols; 2777 } 2778 /* insert received matrix entries */ 2779 for (imdex=0; imdex<nrecvs; imdex++){ 2780 rstart = rowrange[recv_rank[imdex]]; 2781 rend = rowrange[recv_rank[imdex]+1]; 2782 rptr = rbuf_j[imdex]; 2783 cols = rbuf_j[imdex] + rend-rstart + 1; 2784 vals = rbuf_a[imdex]; 2785 for (i=0; i<rend-rstart; i++){ 2786 row = i + rstart; 2787 ncols = rptr[i+1] - rptr[i]; 2788 ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2789 vals += ncols; 2790 cols += ncols; 2791 } 2792 } 2793 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2794 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2795 ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr); 2796 if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N); 2797 if (reuse == MAT_INITIAL_MATRIX) { 2798 PetscContainer container; 2799 *matredundant = C; 2800 /* create a supporting struct and attach it to C for reuse */ 2801 ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr); 2802 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 2803 ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr); 2804 ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr); 2805 ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr); 2806 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 2807 2808 redund->nzlocal = nzlocal; 2809 redund->nsends = nsends; 2810 redund->nrecvs = nrecvs; 2811 redund->send_rank = send_rank; 2812 redund->recv_rank = recv_rank; 2813 redund->sbuf_nz = sbuf_nz; 2814 redund->rbuf_nz = rbuf_nz; 2815 redund->sbuf_j = sbuf_j; 2816 redund->sbuf_a = sbuf_a; 2817 redund->rbuf_j = rbuf_j; 2818 redund->rbuf_a = rbuf_a; 2819 2820 redund->Destroy = C->ops->destroy; 2821 C->ops->destroy = MatDestroy_MatRedundant; 2822 } 2823 PetscFunctionReturn(0); 2824 } 2825 2826 #undef __FUNCT__ 2827 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2828 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2829 { 2830 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2831 PetscErrorCode ierr; 2832 PetscInt i,*idxb = 0; 2833 PetscScalar *va,*vb; 2834 Vec vtmp; 2835 2836 PetscFunctionBegin; 2837 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2838 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2839 if (idx) { 2840 for (i=0; i<A->rmap->n; i++) { 2841 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2842 } 2843 } 2844 2845 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2846 if (idx) { 2847 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2848 } 2849 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2850 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2851 2852 for (i=0; i<A->rmap->n; i++){ 2853 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2854 va[i] = vb[i]; 2855 if (idx) idx[i] = a->garray[idxb[i]]; 2856 } 2857 } 2858 2859 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2860 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2861 ierr = PetscFree(idxb);CHKERRQ(ierr); 2862 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #undef __FUNCT__ 2867 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2868 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2869 { 2870 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2871 PetscErrorCode ierr; 2872 PetscInt i,*idxb = 0; 2873 PetscScalar *va,*vb; 2874 Vec vtmp; 2875 2876 PetscFunctionBegin; 2877 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2878 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2879 if (idx) { 2880 for (i=0; i<A->cmap->n; i++) { 2881 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2882 } 2883 } 2884 2885 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2886 if (idx) { 2887 ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr); 2888 } 2889 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2890 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2891 2892 for (i=0; i<A->rmap->n; i++){ 2893 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2894 va[i] = vb[i]; 2895 if (idx) idx[i] = a->garray[idxb[i]]; 2896 } 2897 } 2898 2899 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2900 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2901 ierr = PetscFree(idxb);CHKERRQ(ierr); 2902 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2903 PetscFunctionReturn(0); 2904 } 2905 2906 #undef __FUNCT__ 2907 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2908 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2909 { 2910 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2911 PetscInt n = A->rmap->n; 2912 PetscInt cstart = A->cmap->rstart; 2913 PetscInt *cmap = mat->garray; 2914 PetscInt *diagIdx, *offdiagIdx; 2915 Vec diagV, offdiagV; 2916 PetscScalar *a, *diagA, *offdiagA; 2917 PetscInt r; 2918 PetscErrorCode ierr; 2919 2920 PetscFunctionBegin; 2921 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2922 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2923 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2924 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2925 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2926 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2927 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2928 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2929 for(r = 0; r < n; ++r) { 2930 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2931 a[r] = diagA[r]; 2932 idx[r] = cstart + diagIdx[r]; 2933 } else { 2934 a[r] = offdiagA[r]; 2935 idx[r] = cmap[offdiagIdx[r]]; 2936 } 2937 } 2938 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2939 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2940 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2941 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2942 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2943 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2944 PetscFunctionReturn(0); 2945 } 2946 2947 #undef __FUNCT__ 2948 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2949 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2950 { 2951 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 2952 PetscInt n = A->rmap->n; 2953 PetscInt cstart = A->cmap->rstart; 2954 PetscInt *cmap = mat->garray; 2955 PetscInt *diagIdx, *offdiagIdx; 2956 Vec diagV, offdiagV; 2957 PetscScalar *a, *diagA, *offdiagA; 2958 PetscInt r; 2959 PetscErrorCode ierr; 2960 2961 PetscFunctionBegin; 2962 ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr); 2963 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr); 2964 ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr); 2965 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2966 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2967 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2968 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2969 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2970 for(r = 0; r < n; ++r) { 2971 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2972 a[r] = diagA[r]; 2973 idx[r] = cstart + diagIdx[r]; 2974 } else { 2975 a[r] = offdiagA[r]; 2976 idx[r] = cmap[offdiagIdx[r]]; 2977 } 2978 } 2979 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2980 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2981 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2982 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2983 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2984 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2985 PetscFunctionReturn(0); 2986 } 2987 2988 #undef __FUNCT__ 2989 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2990 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2991 { 2992 PetscErrorCode ierr; 2993 Mat *dummy; 2994 2995 PetscFunctionBegin; 2996 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2997 *newmat = *dummy; 2998 ierr = PetscFree(dummy);CHKERRQ(ierr); 2999 PetscFunctionReturn(0); 3000 } 3001 3002 extern PetscErrorCode MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*); 3003 3004 #undef __FUNCT__ 3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 3006 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 3007 { 3008 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 3009 PetscErrorCode ierr; 3010 3011 PetscFunctionBegin; 3012 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 3013 PetscFunctionReturn(0); 3014 } 3015 3016 3017 /* -------------------------------------------------------------------*/ 3018 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 3019 MatGetRow_MPIAIJ, 3020 MatRestoreRow_MPIAIJ, 3021 MatMult_MPIAIJ, 3022 /* 4*/ MatMultAdd_MPIAIJ, 3023 MatMultTranspose_MPIAIJ, 3024 MatMultTransposeAdd_MPIAIJ, 3025 #ifdef PETSC_HAVE_PBGL 3026 MatSolve_MPIAIJ, 3027 #else 3028 0, 3029 #endif 3030 0, 3031 0, 3032 /*10*/ 0, 3033 0, 3034 0, 3035 MatSOR_MPIAIJ, 3036 MatTranspose_MPIAIJ, 3037 /*15*/ MatGetInfo_MPIAIJ, 3038 MatEqual_MPIAIJ, 3039 MatGetDiagonal_MPIAIJ, 3040 MatDiagonalScale_MPIAIJ, 3041 MatNorm_MPIAIJ, 3042 /*20*/ MatAssemblyBegin_MPIAIJ, 3043 MatAssemblyEnd_MPIAIJ, 3044 MatSetOption_MPIAIJ, 3045 MatZeroEntries_MPIAIJ, 3046 /*24*/ MatZeroRows_MPIAIJ, 3047 0, 3048 #ifdef PETSC_HAVE_PBGL 3049 0, 3050 #else 3051 0, 3052 #endif 3053 0, 3054 0, 3055 /*29*/ MatSetUp_MPIAIJ, 3056 #ifdef PETSC_HAVE_PBGL 3057 0, 3058 #else 3059 0, 3060 #endif 3061 0, 3062 0, 3063 0, 3064 /*34*/ MatDuplicate_MPIAIJ, 3065 0, 3066 0, 3067 0, 3068 0, 3069 /*39*/ MatAXPY_MPIAIJ, 3070 MatGetSubMatrices_MPIAIJ, 3071 MatIncreaseOverlap_MPIAIJ, 3072 MatGetValues_MPIAIJ, 3073 MatCopy_MPIAIJ, 3074 /*44*/ MatGetRowMax_MPIAIJ, 3075 MatScale_MPIAIJ, 3076 0, 3077 0, 3078 MatZeroRowsColumns_MPIAIJ, 3079 /*49*/ 0, 3080 0, 3081 0, 3082 0, 3083 0, 3084 /*54*/ MatFDColoringCreate_MPIAIJ, 3085 0, 3086 MatSetUnfactored_MPIAIJ, 3087 0, /* MatPermute_MPIAIJ, impl currently broken */ 3088 0, 3089 /*59*/ MatGetSubMatrix_MPIAIJ, 3090 MatDestroy_MPIAIJ, 3091 MatView_MPIAIJ, 3092 0, 3093 0, 3094 /*64*/ 0, 3095 0, 3096 0, 3097 0, 3098 0, 3099 /*69*/ MatGetRowMaxAbs_MPIAIJ, 3100 MatGetRowMinAbs_MPIAIJ, 3101 0, 3102 MatSetColoring_MPIAIJ, 3103 #if defined(PETSC_HAVE_ADIC) 3104 MatSetValuesAdic_MPIAIJ, 3105 #else 3106 0, 3107 #endif 3108 MatSetValuesAdifor_MPIAIJ, 3109 /*75*/ MatFDColoringApply_AIJ, 3110 0, 3111 0, 3112 0, 3113 0, 3114 /*80*/ 0, 3115 0, 3116 0, 3117 /*83*/ MatLoad_MPIAIJ, 3118 0, 3119 0, 3120 0, 3121 0, 3122 0, 3123 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 3124 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 3125 MatMatMultNumeric_MPIAIJ_MPIAIJ, 3126 MatPtAP_Basic, 3127 MatPtAPSymbolic_MPIAIJ, 3128 /*94*/ MatPtAPNumeric_MPIAIJ, 3129 0, 3130 0, 3131 0, 3132 0, 3133 /*99*/ 0, 3134 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 3135 MatPtAPNumeric_MPIAIJ_MPIAIJ, 3136 MatConjugate_MPIAIJ, 3137 0, 3138 /*104*/MatSetValuesRow_MPIAIJ, 3139 MatRealPart_MPIAIJ, 3140 MatImaginaryPart_MPIAIJ, 3141 0, 3142 0, 3143 /*109*/0, 3144 MatGetRedundantMatrix_MPIAIJ, 3145 MatGetRowMin_MPIAIJ, 3146 0, 3147 0, 3148 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 3149 0, 3150 0, 3151 0, 3152 0, 3153 /*119*/0, 3154 0, 3155 0, 3156 0, 3157 MatGetMultiProcBlock_MPIAIJ, 3158 /*124*/MatFindNonZeroRows_MPIAIJ, 3159 MatGetColumnNorms_MPIAIJ, 3160 MatInvertBlockDiagonal_MPIAIJ, 3161 0, 3162 MatGetSubMatricesParallel_MPIAIJ, 3163 /*129*/0, 3164 MatTransposeMatMult_MPIAIJ_MPIAIJ, 3165 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 3166 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 3167 0, 3168 /*134*/0, 3169 0, 3170 0, 3171 0, 3172 0 3173 }; 3174 3175 /* ----------------------------------------------------------------------------------------*/ 3176 3177 EXTERN_C_BEGIN 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "MatStoreValues_MPIAIJ" 3180 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 3181 { 3182 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3183 PetscErrorCode ierr; 3184 3185 PetscFunctionBegin; 3186 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 3187 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 3188 PetscFunctionReturn(0); 3189 } 3190 EXTERN_C_END 3191 3192 EXTERN_C_BEGIN 3193 #undef __FUNCT__ 3194 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 3195 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 3196 { 3197 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 3198 PetscErrorCode ierr; 3199 3200 PetscFunctionBegin; 3201 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 3202 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 3203 PetscFunctionReturn(0); 3204 } 3205 EXTERN_C_END 3206 3207 EXTERN_C_BEGIN 3208 #undef __FUNCT__ 3209 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 3210 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3211 { 3212 Mat_MPIAIJ *b; 3213 PetscErrorCode ierr; 3214 PetscInt i; 3215 PetscBool d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE; 3216 3217 PetscFunctionBegin; 3218 if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE; 3219 if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE; 3220 if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5; 3221 if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2; 3222 if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz); 3223 if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz); 3224 3225 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3226 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3227 if (d_nnz) { 3228 for (i=0; i<B->rmap->n; i++) { 3229 if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]); 3230 } 3231 } 3232 if (o_nnz) { 3233 for (i=0; i<B->rmap->n; i++) { 3234 if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]); 3235 } 3236 } 3237 b = (Mat_MPIAIJ*)B->data; 3238 3239 if (!B->preallocated) { 3240 /* Explicitly create 2 MATSEQAIJ matrices. */ 3241 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 3242 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 3243 ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3244 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 3245 ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr); 3246 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 3247 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 3248 ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 3249 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 3250 ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr); 3251 } 3252 3253 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 3254 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 3255 /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */ 3256 if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3257 if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);} 3258 B->preallocated = PETSC_TRUE; 3259 PetscFunctionReturn(0); 3260 } 3261 EXTERN_C_END 3262 3263 #undef __FUNCT__ 3264 #define __FUNCT__ "MatDuplicate_MPIAIJ" 3265 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 3266 { 3267 Mat mat; 3268 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 3269 PetscErrorCode ierr; 3270 3271 PetscFunctionBegin; 3272 *newmat = 0; 3273 ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr); 3274 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 3275 ierr = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr); 3276 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 3277 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3278 a = (Mat_MPIAIJ*)mat->data; 3279 3280 mat->factortype = matin->factortype; 3281 mat->rmap->bs = matin->rmap->bs; 3282 mat->cmap->bs = matin->cmap->bs; 3283 mat->assembled = PETSC_TRUE; 3284 mat->insertmode = NOT_SET_VALUES; 3285 mat->preallocated = PETSC_TRUE; 3286 3287 a->size = oldmat->size; 3288 a->rank = oldmat->rank; 3289 a->donotstash = oldmat->donotstash; 3290 a->roworiented = oldmat->roworiented; 3291 a->rowindices = 0; 3292 a->rowvalues = 0; 3293 a->getrowactive = PETSC_FALSE; 3294 3295 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 3296 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3297 3298 if (oldmat->colmap) { 3299 #if defined (PETSC_USE_CTABLE) 3300 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 3301 #else 3302 ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr); 3303 ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3304 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 3305 #endif 3306 } else a->colmap = 0; 3307 if (oldmat->garray) { 3308 PetscInt len; 3309 len = oldmat->B->cmap->n; 3310 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr); 3311 ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3312 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 3313 } else a->garray = 0; 3314 3315 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr); 3317 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 3318 ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr); 3319 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 3320 ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr); 3321 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 3322 ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr); 3323 ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 3324 *newmat = mat; 3325 PetscFunctionReturn(0); 3326 } 3327 3328 3329 3330 #undef __FUNCT__ 3331 #define __FUNCT__ "MatLoad_MPIAIJ" 3332 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3333 { 3334 PetscScalar *vals,*svals; 3335 MPI_Comm comm = ((PetscObject)viewer)->comm; 3336 PetscErrorCode ierr; 3337 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 3338 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols; 3339 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 3340 PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols; 3341 PetscInt cend,cstart,n,*rowners,sizesset=1; 3342 int fd; 3343 PetscInt bs = 1; 3344 3345 PetscFunctionBegin; 3346 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3347 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3348 if (!rank) { 3349 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3350 ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 3351 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 3352 } 3353 3354 ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr); 3355 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr); 3356 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3357 3358 if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0; 3359 3360 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 3361 M = header[1]; N = header[2]; 3362 /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 3363 if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M; 3364 if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N; 3365 3366 /* If global sizes are set, check if they are consistent with that given in the file */ 3367 if (sizesset) { 3368 ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr); 3369 } 3370 if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3371 if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 3372 3373 /* determine ownership of all (block) rows */ 3374 if( M%bs ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3375 if (newMat->rmap->n < 0 ) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3376 else m = newMat->rmap->n; /* Set by user */ 3377 3378 ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr); 3379 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3380 3381 /* First process needs enough room for process with most rows */ 3382 if (!rank) { 3383 mmax = rowners[1]; 3384 for (i=2; i<size; i++) { 3385 mmax = PetscMax(mmax,rowners[i]); 3386 } 3387 } else mmax = m; 3388 3389 rowners[0] = 0; 3390 for (i=2; i<=size; i++) { 3391 rowners[i] += rowners[i-1]; 3392 } 3393 rstart = rowners[rank]; 3394 rend = rowners[rank+1]; 3395 3396 /* distribute row lengths to all processors */ 3397 ierr = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr); 3398 if (!rank) { 3399 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 3400 ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 3401 ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr); 3402 ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr); 3403 for (j=0; j<m; j++) { 3404 procsnz[0] += ourlens[j]; 3405 } 3406 for (i=1; i<size; i++) { 3407 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 3408 /* calculate the number of nonzeros on each processor */ 3409 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3410 procsnz[i] += rowlengths[j]; 3411 } 3412 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3413 } 3414 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3415 } else { 3416 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3417 } 3418 3419 if (!rank) { 3420 /* determine max buffer needed and allocate it */ 3421 maxnz = 0; 3422 for (i=0; i<size; i++) { 3423 maxnz = PetscMax(maxnz,procsnz[i]); 3424 } 3425 ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr); 3426 3427 /* read in my part of the matrix column indices */ 3428 nz = procsnz[0]; 3429 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3430 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 3431 3432 /* read in every one elses and ship off */ 3433 for (i=1; i<size; i++) { 3434 nz = procsnz[i]; 3435 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 3436 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3437 } 3438 ierr = PetscFree(cols);CHKERRQ(ierr); 3439 } else { 3440 /* determine buffer space needed for message */ 3441 nz = 0; 3442 for (i=0; i<m; i++) { 3443 nz += ourlens[i]; 3444 } 3445 ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr); 3446 3447 /* receive message of column indices*/ 3448 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3449 } 3450 3451 /* determine column ownership if matrix is not square */ 3452 if (N != M) { 3453 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3454 else n = newMat->cmap->n; 3455 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3456 cstart = cend - n; 3457 } else { 3458 cstart = rstart; 3459 cend = rend; 3460 n = cend - cstart; 3461 } 3462 3463 /* loop over local rows, determining number of off diagonal entries */ 3464 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3465 jj = 0; 3466 for (i=0; i<m; i++) { 3467 for (j=0; j<ourlens[i]; j++) { 3468 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3469 jj++; 3470 } 3471 } 3472 3473 for (i=0; i<m; i++) { 3474 ourlens[i] -= offlens[i]; 3475 } 3476 if (!sizesset) { 3477 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3478 } 3479 3480 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3481 3482 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3483 3484 for (i=0; i<m; i++) { 3485 ourlens[i] += offlens[i]; 3486 } 3487 3488 if (!rank) { 3489 ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3490 3491 /* read in my part of the matrix numerical values */ 3492 nz = procsnz[0]; 3493 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3494 3495 /* insert into matrix */ 3496 jj = rstart; 3497 smycols = mycols; 3498 svals = vals; 3499 for (i=0; i<m; i++) { 3500 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3501 smycols += ourlens[i]; 3502 svals += ourlens[i]; 3503 jj++; 3504 } 3505 3506 /* read in other processors and ship out */ 3507 for (i=1; i<size; i++) { 3508 nz = procsnz[i]; 3509 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3510 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3513 } else { 3514 /* receive numeric values */ 3515 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr); 3516 3517 /* receive message of values*/ 3518 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3519 3520 /* insert into matrix */ 3521 jj = rstart; 3522 smycols = mycols; 3523 svals = vals; 3524 for (i=0; i<m; i++) { 3525 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3526 smycols += ourlens[i]; 3527 svals += ourlens[i]; 3528 jj++; 3529 } 3530 } 3531 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3532 ierr = PetscFree(vals);CHKERRQ(ierr); 3533 ierr = PetscFree(mycols);CHKERRQ(ierr); 3534 ierr = PetscFree(rowners);CHKERRQ(ierr); 3535 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3536 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3537 3538 PetscFunctionReturn(0); 3539 } 3540 3541 #undef __FUNCT__ 3542 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3543 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3544 { 3545 PetscErrorCode ierr; 3546 IS iscol_local; 3547 PetscInt csize; 3548 3549 PetscFunctionBegin; 3550 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3551 if (call == MAT_REUSE_MATRIX) { 3552 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3553 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3554 } else { 3555 PetscInt cbs; 3556 ierr = ISGetBlockSize(iscol,&cbs); CHKERRQ(ierr); 3557 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3558 ierr = ISSetBlockSize(iscol_local,cbs); CHKERRQ(ierr); 3559 } 3560 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3561 if (call == MAT_INITIAL_MATRIX) { 3562 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3563 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3564 } 3565 PetscFunctionReturn(0); 3566 } 3567 3568 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3569 #undef __FUNCT__ 3570 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3571 /* 3572 Not great since it makes two copies of the submatrix, first an SeqAIJ 3573 in local and then by concatenating the local matrices the end result. 3574 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3575 3576 Note: This requires a sequential iscol with all indices. 3577 */ 3578 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3579 { 3580 PetscErrorCode ierr; 3581 PetscMPIInt rank,size; 3582 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3583 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3584 PetscBool allcolumns, colflag; 3585 Mat M,Mreuse; 3586 MatScalar *vwork,*aa; 3587 MPI_Comm comm = ((PetscObject)mat)->comm; 3588 Mat_SeqAIJ *aij; 3589 3590 3591 PetscFunctionBegin; 3592 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3593 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3594 3595 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3596 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3597 if (colflag && ncol == mat->cmap->N){ 3598 allcolumns = PETSC_TRUE; 3599 } else { 3600 allcolumns = PETSC_FALSE; 3601 } 3602 if (call == MAT_REUSE_MATRIX) { 3603 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr); 3604 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3605 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3606 } else { 3607 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3608 } 3609 3610 /* 3611 m - number of local rows 3612 n - number of columns (same on all processors) 3613 rstart - first row in new global matrix generated 3614 */ 3615 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3616 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3617 if (call == MAT_INITIAL_MATRIX) { 3618 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3619 ii = aij->i; 3620 jj = aij->j; 3621 3622 /* 3623 Determine the number of non-zeros in the diagonal and off-diagonal 3624 portions of the matrix in order to do correct preallocation 3625 */ 3626 3627 /* first get start and end of "diagonal" columns */ 3628 if (csize == PETSC_DECIDE) { 3629 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3630 if (mglobal == n) { /* square matrix */ 3631 nlocal = m; 3632 } else { 3633 nlocal = n/size + ((n % size) > rank); 3634 } 3635 } else { 3636 nlocal = csize; 3637 } 3638 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3639 rstart = rend - nlocal; 3640 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3641 3642 /* next, compute all the lengths */ 3643 ierr = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr); 3644 olens = dlens + m; 3645 for (i=0; i<m; i++) { 3646 jend = ii[i+1] - ii[i]; 3647 olen = 0; 3648 dlen = 0; 3649 for (j=0; j<jend; j++) { 3650 if (*jj < rstart || *jj >= rend) olen++; 3651 else dlen++; 3652 jj++; 3653 } 3654 olens[i] = olen; 3655 dlens[i] = dlen; 3656 } 3657 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3658 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3659 ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); 3660 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3661 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3662 ierr = PetscFree(dlens);CHKERRQ(ierr); 3663 } else { 3664 PetscInt ml,nl; 3665 3666 M = *newmat; 3667 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3668 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3669 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3670 /* 3671 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3672 rather than the slower MatSetValues(). 3673 */ 3674 M->was_assembled = PETSC_TRUE; 3675 M->assembled = PETSC_FALSE; 3676 } 3677 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3678 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3679 ii = aij->i; 3680 jj = aij->j; 3681 aa = aij->a; 3682 for (i=0; i<m; i++) { 3683 row = rstart + i; 3684 nz = ii[i+1] - ii[i]; 3685 cwork = jj; jj += nz; 3686 vwork = aa; aa += nz; 3687 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3688 } 3689 3690 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3691 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3692 *newmat = M; 3693 3694 /* save submatrix used in processor for next request */ 3695 if (call == MAT_INITIAL_MATRIX) { 3696 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3697 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3698 } 3699 3700 PetscFunctionReturn(0); 3701 } 3702 3703 EXTERN_C_BEGIN 3704 #undef __FUNCT__ 3705 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3706 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3707 { 3708 PetscInt m,cstart, cend,j,nnz,i,d; 3709 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3710 const PetscInt *JJ; 3711 PetscScalar *values; 3712 PetscErrorCode ierr; 3713 3714 PetscFunctionBegin; 3715 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3716 3717 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3718 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3719 m = B->rmap->n; 3720 cstart = B->cmap->rstart; 3721 cend = B->cmap->rend; 3722 rstart = B->rmap->rstart; 3723 3724 ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr); 3725 3726 #if defined(PETSC_USE_DEBUGGING) 3727 for (i=0; i<m; i++) { 3728 nnz = Ii[i+1]- Ii[i]; 3729 JJ = J + Ii[i]; 3730 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3731 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3732 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3733 } 3734 #endif 3735 3736 for (i=0; i<m; i++) { 3737 nnz = Ii[i+1]- Ii[i]; 3738 JJ = J + Ii[i]; 3739 nnz_max = PetscMax(nnz_max,nnz); 3740 d = 0; 3741 for (j=0; j<nnz; j++) { 3742 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3743 } 3744 d_nnz[i] = d; 3745 o_nnz[i] = nnz - d; 3746 } 3747 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3748 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3749 3750 if (v) values = (PetscScalar*)v; 3751 else { 3752 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr); 3753 ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr); 3754 } 3755 3756 for (i=0; i<m; i++) { 3757 ii = i + rstart; 3758 nnz = Ii[i+1]- Ii[i]; 3759 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3760 } 3761 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3762 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3763 3764 if (!v) { 3765 ierr = PetscFree(values);CHKERRQ(ierr); 3766 } 3767 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3768 PetscFunctionReturn(0); 3769 } 3770 EXTERN_C_END 3771 3772 #undef __FUNCT__ 3773 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3774 /*@ 3775 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3776 (the default parallel PETSc format). 3777 3778 Collective on MPI_Comm 3779 3780 Input Parameters: 3781 + B - the matrix 3782 . i - the indices into j for the start of each local row (starts with zero) 3783 . j - the column indices for each local row (starts with zero) 3784 - v - optional values in the matrix 3785 3786 Level: developer 3787 3788 Notes: 3789 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3790 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3791 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3792 3793 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3794 3795 The format which is used for the sparse matrix input, is equivalent to a 3796 row-major ordering.. i.e for the following matrix, the input data expected is 3797 as shown: 3798 3799 1 0 0 3800 2 0 3 P0 3801 ------- 3802 4 5 6 P1 3803 3804 Process0 [P0]: rows_owned=[0,1] 3805 i = {0,1,3} [size = nrow+1 = 2+1] 3806 j = {0,0,2} [size = nz = 6] 3807 v = {1,2,3} [size = nz = 6] 3808 3809 Process1 [P1]: rows_owned=[2] 3810 i = {0,3} [size = nrow+1 = 1+1] 3811 j = {0,1,2} [size = nz = 6] 3812 v = {4,5,6} [size = nz = 6] 3813 3814 .keywords: matrix, aij, compressed row, sparse, parallel 3815 3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, 3817 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3818 @*/ 3819 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3820 { 3821 PetscErrorCode ierr; 3822 3823 PetscFunctionBegin; 3824 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3825 PetscFunctionReturn(0); 3826 } 3827 3828 #undef __FUNCT__ 3829 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3830 /*@C 3831 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3832 (the default parallel PETSc format). For good matrix assembly performance 3833 the user should preallocate the matrix storage by setting the parameters 3834 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3835 performance can be increased by more than a factor of 50. 3836 3837 Collective on MPI_Comm 3838 3839 Input Parameters: 3840 + A - the matrix 3841 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3842 (same value is used for all local rows) 3843 . d_nnz - array containing the number of nonzeros in the various rows of the 3844 DIAGONAL portion of the local submatrix (possibly different for each row) 3845 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 3846 The size of this array is equal to the number of local rows, i.e 'm'. 3847 For matrices that will be factored, you must leave room for (and set) 3848 the diagonal entry even if it is zero. 3849 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3850 submatrix (same value is used for all local rows). 3851 - o_nnz - array containing the number of nonzeros in the various rows of the 3852 OFF-DIAGONAL portion of the local submatrix (possibly different for 3853 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 3854 structure. The size of this array is equal to the number 3855 of local rows, i.e 'm'. 3856 3857 If the *_nnz parameter is given then the *_nz parameter is ignored 3858 3859 The AIJ format (also called the Yale sparse matrix format or 3860 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3861 storage. The stored row and column indices begin with zero. 3862 See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details. 3863 3864 The parallel matrix is partitioned such that the first m0 rows belong to 3865 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3866 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3867 3868 The DIAGONAL portion of the local submatrix of a processor can be defined 3869 as the submatrix which is obtained by extraction the part corresponding to 3870 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3871 first row that belongs to the processor, r2 is the last row belonging to 3872 the this processor, and c1-c2 is range of indices of the local part of a 3873 vector suitable for applying the matrix to. This is an mxn matrix. In the 3874 common case of a square matrix, the row and column ranges are the same and 3875 the DIAGONAL part is also square. The remaining portion of the local 3876 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3877 3878 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3879 3880 You can call MatGetInfo() to get information on how effective the preallocation was; 3881 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3882 You can also run with the option -info and look for messages with the string 3883 malloc in them to see if additional memory allocation was needed. 3884 3885 Example usage: 3886 3887 Consider the following 8x8 matrix with 34 non-zero values, that is 3888 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3889 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3890 as follows: 3891 3892 .vb 3893 1 2 0 | 0 3 0 | 0 4 3894 Proc0 0 5 6 | 7 0 0 | 8 0 3895 9 0 10 | 11 0 0 | 12 0 3896 ------------------------------------- 3897 13 0 14 | 15 16 17 | 0 0 3898 Proc1 0 18 0 | 19 20 21 | 0 0 3899 0 0 0 | 22 23 0 | 24 0 3900 ------------------------------------- 3901 Proc2 25 26 27 | 0 0 28 | 29 0 3902 30 0 0 | 31 32 33 | 0 34 3903 .ve 3904 3905 This can be represented as a collection of submatrices as: 3906 3907 .vb 3908 A B C 3909 D E F 3910 G H I 3911 .ve 3912 3913 Where the submatrices A,B,C are owned by proc0, D,E,F are 3914 owned by proc1, G,H,I are owned by proc2. 3915 3916 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3917 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3918 The 'M','N' parameters are 8,8, and have the same values on all procs. 3919 3920 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3921 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3922 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3923 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3924 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3925 matrix, ans [DF] as another SeqAIJ matrix. 3926 3927 When d_nz, o_nz parameters are specified, d_nz storage elements are 3928 allocated for every row of the local diagonal submatrix, and o_nz 3929 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3930 One way to choose d_nz and o_nz is to use the max nonzerors per local 3931 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3932 In this case, the values of d_nz,o_nz are: 3933 .vb 3934 proc0 : dnz = 2, o_nz = 2 3935 proc1 : dnz = 3, o_nz = 2 3936 proc2 : dnz = 1, o_nz = 4 3937 .ve 3938 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3939 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3940 for proc3. i.e we are using 12+15+10=37 storage locations to store 3941 34 values. 3942 3943 When d_nnz, o_nnz parameters are specified, the storage is specified 3944 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3945 In the above case the values for d_nnz,o_nnz are: 3946 .vb 3947 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3948 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3949 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3950 .ve 3951 Here the space allocated is sum of all the above values i.e 34, and 3952 hence pre-allocation is perfect. 3953 3954 Level: intermediate 3955 3956 .keywords: matrix, aij, compressed row, sparse, parallel 3957 3958 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3959 MPIAIJ, MatGetInfo() 3960 @*/ 3961 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3962 { 3963 PetscErrorCode ierr; 3964 3965 PetscFunctionBegin; 3966 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3967 PetscValidType(B,1); 3968 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3969 PetscFunctionReturn(0); 3970 } 3971 3972 #undef __FUNCT__ 3973 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3974 /*@ 3975 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3976 CSR format the local rows. 3977 3978 Collective on MPI_Comm 3979 3980 Input Parameters: 3981 + comm - MPI communicator 3982 . m - number of local rows (Cannot be PETSC_DECIDE) 3983 . n - This value should be the same as the local size used in creating the 3984 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3985 calculated if N is given) For square matrices n is almost always m. 3986 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3987 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3988 . i - row indices 3989 . j - column indices 3990 - a - matrix values 3991 3992 Output Parameter: 3993 . mat - the matrix 3994 3995 Level: intermediate 3996 3997 Notes: 3998 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3999 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4000 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4001 4002 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4003 4004 The format which is used for the sparse matrix input, is equivalent to a 4005 row-major ordering.. i.e for the following matrix, the input data expected is 4006 as shown: 4007 4008 1 0 0 4009 2 0 3 P0 4010 ------- 4011 4 5 6 P1 4012 4013 Process0 [P0]: rows_owned=[0,1] 4014 i = {0,1,3} [size = nrow+1 = 2+1] 4015 j = {0,0,2} [size = nz = 6] 4016 v = {1,2,3} [size = nz = 6] 4017 4018 Process1 [P1]: rows_owned=[2] 4019 i = {0,3} [size = nrow+1 = 1+1] 4020 j = {0,1,2} [size = nz = 6] 4021 v = {4,5,6} [size = nz = 6] 4022 4023 .keywords: matrix, aij, compressed row, sparse, parallel 4024 4025 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4026 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4027 @*/ 4028 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4029 { 4030 PetscErrorCode ierr; 4031 4032 PetscFunctionBegin; 4033 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4034 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4035 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4036 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4037 /* ierr = MatSetBlockSizes(M,bs,cbs); CHKERRQ(ierr); */ 4038 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4039 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4040 PetscFunctionReturn(0); 4041 } 4042 4043 #undef __FUNCT__ 4044 #define __FUNCT__ "MatCreateAIJ" 4045 /*@C 4046 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4047 (the default parallel PETSc format). For good matrix assembly performance 4048 the user should preallocate the matrix storage by setting the parameters 4049 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4050 performance can be increased by more than a factor of 50. 4051 4052 Collective on MPI_Comm 4053 4054 Input Parameters: 4055 + comm - MPI communicator 4056 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4057 This value should be the same as the local size used in creating the 4058 y vector for the matrix-vector product y = Ax. 4059 . n - This value should be the same as the local size used in creating the 4060 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4061 calculated if N is given) For square matrices n is almost always m. 4062 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4063 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4064 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4065 (same value is used for all local rows) 4066 . d_nnz - array containing the number of nonzeros in the various rows of the 4067 DIAGONAL portion of the local submatrix (possibly different for each row) 4068 or PETSC_NULL, if d_nz is used to specify the nonzero structure. 4069 The size of this array is equal to the number of local rows, i.e 'm'. 4070 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4071 submatrix (same value is used for all local rows). 4072 - o_nnz - array containing the number of nonzeros in the various rows of the 4073 OFF-DIAGONAL portion of the local submatrix (possibly different for 4074 each row) or PETSC_NULL, if o_nz is used to specify the nonzero 4075 structure. The size of this array is equal to the number 4076 of local rows, i.e 'm'. 4077 4078 Output Parameter: 4079 . A - the matrix 4080 4081 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4082 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4083 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4084 4085 Notes: 4086 If the *_nnz parameter is given then the *_nz parameter is ignored 4087 4088 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4089 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4090 storage requirements for this matrix. 4091 4092 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4093 processor than it must be used on all processors that share the object for 4094 that argument. 4095 4096 The user MUST specify either the local or global matrix dimensions 4097 (possibly both). 4098 4099 The parallel matrix is partitioned across processors such that the 4100 first m0 rows belong to process 0, the next m1 rows belong to 4101 process 1, the next m2 rows belong to process 2 etc.. where 4102 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4103 values corresponding to [m x N] submatrix. 4104 4105 The columns are logically partitioned with the n0 columns belonging 4106 to 0th partition, the next n1 columns belonging to the next 4107 partition etc.. where n0,n1,n2... are the the input parameter 'n'. 4108 4109 The DIAGONAL portion of the local submatrix on any given processor 4110 is the submatrix corresponding to the rows and columns m,n 4111 corresponding to the given processor. i.e diagonal matrix on 4112 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4113 etc. The remaining portion of the local submatrix [m x (N-n)] 4114 constitute the OFF-DIAGONAL portion. The example below better 4115 illustrates this concept. 4116 4117 For a square global matrix we define each processor's diagonal portion 4118 to be its local rows and the corresponding columns (a square submatrix); 4119 each processor's off-diagonal portion encompasses the remainder of the 4120 local matrix (a rectangular submatrix). 4121 4122 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4123 4124 When calling this routine with a single process communicator, a matrix of 4125 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4126 type of communicator, use the construction mechanism: 4127 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4128 4129 By default, this format uses inodes (identical nodes) when possible. 4130 We search for consecutive rows with the same nonzero structure, thereby 4131 reusing matrix information to achieve increased efficiency. 4132 4133 Options Database Keys: 4134 + -mat_no_inode - Do not use inodes 4135 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4136 - -mat_aij_oneindex - Internally use indexing starting at 1 4137 rather than 0. Note that when calling MatSetValues(), 4138 the user still MUST index entries starting at 0! 4139 4140 4141 Example usage: 4142 4143 Consider the following 8x8 matrix with 34 non-zero values, that is 4144 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4145 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4146 as follows: 4147 4148 .vb 4149 1 2 0 | 0 3 0 | 0 4 4150 Proc0 0 5 6 | 7 0 0 | 8 0 4151 9 0 10 | 11 0 0 | 12 0 4152 ------------------------------------- 4153 13 0 14 | 15 16 17 | 0 0 4154 Proc1 0 18 0 | 19 20 21 | 0 0 4155 0 0 0 | 22 23 0 | 24 0 4156 ------------------------------------- 4157 Proc2 25 26 27 | 0 0 28 | 29 0 4158 30 0 0 | 31 32 33 | 0 34 4159 .ve 4160 4161 This can be represented as a collection of submatrices as: 4162 4163 .vb 4164 A B C 4165 D E F 4166 G H I 4167 .ve 4168 4169 Where the submatrices A,B,C are owned by proc0, D,E,F are 4170 owned by proc1, G,H,I are owned by proc2. 4171 4172 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4173 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4174 The 'M','N' parameters are 8,8, and have the same values on all procs. 4175 4176 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4177 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4178 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4179 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4180 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4181 matrix, ans [DF] as another SeqAIJ matrix. 4182 4183 When d_nz, o_nz parameters are specified, d_nz storage elements are 4184 allocated for every row of the local diagonal submatrix, and o_nz 4185 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4186 One way to choose d_nz and o_nz is to use the max nonzerors per local 4187 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4188 In this case, the values of d_nz,o_nz are: 4189 .vb 4190 proc0 : dnz = 2, o_nz = 2 4191 proc1 : dnz = 3, o_nz = 2 4192 proc2 : dnz = 1, o_nz = 4 4193 .ve 4194 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4195 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4196 for proc3. i.e we are using 12+15+10=37 storage locations to store 4197 34 values. 4198 4199 When d_nnz, o_nnz parameters are specified, the storage is specified 4200 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4201 In the above case the values for d_nnz,o_nnz are: 4202 .vb 4203 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4204 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4205 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4206 .ve 4207 Here the space allocated is sum of all the above values i.e 34, and 4208 hence pre-allocation is perfect. 4209 4210 Level: intermediate 4211 4212 .keywords: matrix, aij, compressed row, sparse, parallel 4213 4214 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4215 MPIAIJ, MatCreateMPIAIJWithArrays() 4216 @*/ 4217 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4218 { 4219 PetscErrorCode ierr; 4220 PetscMPIInt size; 4221 4222 PetscFunctionBegin; 4223 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4224 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4225 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4226 if (size > 1) { 4227 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4228 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4229 } else { 4230 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4231 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4232 } 4233 PetscFunctionReturn(0); 4234 } 4235 4236 #undef __FUNCT__ 4237 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 4238 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[]) 4239 { 4240 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4241 4242 PetscFunctionBegin; 4243 *Ad = a->A; 4244 *Ao = a->B; 4245 *colmap = a->garray; 4246 PetscFunctionReturn(0); 4247 } 4248 4249 #undef __FUNCT__ 4250 #define __FUNCT__ "MatSetColoring_MPIAIJ" 4251 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring) 4252 { 4253 PetscErrorCode ierr; 4254 PetscInt i; 4255 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4256 4257 PetscFunctionBegin; 4258 if (coloring->ctype == IS_COLORING_GLOBAL) { 4259 ISColoringValue *allcolors,*colors; 4260 ISColoring ocoloring; 4261 4262 /* set coloring for diagonal portion */ 4263 ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr); 4264 4265 /* set coloring for off-diagonal portion */ 4266 ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr); 4267 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4268 for (i=0; i<a->B->cmap->n; i++) { 4269 colors[i] = allcolors[a->garray[i]]; 4270 } 4271 ierr = PetscFree(allcolors);CHKERRQ(ierr); 4272 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4273 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4274 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4275 } else if (coloring->ctype == IS_COLORING_GHOSTED) { 4276 ISColoringValue *colors; 4277 PetscInt *larray; 4278 ISColoring ocoloring; 4279 4280 /* set coloring for diagonal portion */ 4281 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4282 for (i=0; i<a->A->cmap->n; i++) { 4283 larray[i] = i + A->cmap->rstart; 4284 } 4285 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr); 4286 ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4287 for (i=0; i<a->A->cmap->n; i++) { 4288 colors[i] = coloring->colors[larray[i]]; 4289 } 4290 ierr = PetscFree(larray);CHKERRQ(ierr); 4291 ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4292 ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr); 4293 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4294 4295 /* set coloring for off-diagonal portion */ 4296 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr); 4297 ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr); 4298 ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr); 4299 for (i=0; i<a->B->cmap->n; i++) { 4300 colors[i] = coloring->colors[larray[i]]; 4301 } 4302 ierr = PetscFree(larray);CHKERRQ(ierr); 4303 ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr); 4304 ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr); 4305 ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr); 4306 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype); 4307 4308 PetscFunctionReturn(0); 4309 } 4310 4311 #if defined(PETSC_HAVE_ADIC) 4312 #undef __FUNCT__ 4313 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ" 4314 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues) 4315 { 4316 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4317 PetscErrorCode ierr; 4318 4319 PetscFunctionBegin; 4320 ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr); 4321 ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr); 4322 PetscFunctionReturn(0); 4323 } 4324 #endif 4325 4326 #undef __FUNCT__ 4327 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ" 4328 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues) 4329 { 4330 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4331 PetscErrorCode ierr; 4332 4333 PetscFunctionBegin; 4334 ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr); 4335 ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr); 4336 PetscFunctionReturn(0); 4337 } 4338 4339 #undef __FUNCT__ 4340 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic" 4341 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat) 4342 { 4343 PetscErrorCode ierr; 4344 PetscInt m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs; 4345 PetscInt *indx; 4346 4347 PetscFunctionBegin; 4348 /* This routine will ONLY return MPIAIJ type matrix */ 4349 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4350 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4351 if (n == PETSC_DECIDE){ 4352 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4353 } 4354 /* Check sum(n) = N */ 4355 ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4356 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 4357 4358 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4359 rstart -= m; 4360 4361 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4362 for (i=0;i<m;i++) { 4363 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4364 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4365 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr); 4366 } 4367 4368 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4369 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE); CHKERRQ(ierr); 4370 ierr = MatSetBlockSizes(*outmat,bs,cbs); CHKERRQ(ierr); 4371 ierr = MatSetType(*outmat,MATMPIAIJ); CHKERRQ(ierr); 4372 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4373 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4374 PetscFunctionReturn(0); 4375 } 4376 4377 #undef __FUNCT__ 4378 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric" 4379 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat) 4380 { 4381 PetscErrorCode ierr; 4382 PetscInt m,N,i,rstart,nnz,Ii; 4383 PetscInt *indx; 4384 PetscScalar *values; 4385 4386 PetscFunctionBegin; 4387 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4388 ierr = MatGetOwnershipRange(outmat,&rstart,PETSC_NULL);CHKERRQ(ierr); 4389 for (i=0;i<m;i++) { 4390 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4391 Ii = i + rstart; 4392 ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4393 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4394 } 4395 ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4396 ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4397 PetscFunctionReturn(0); 4398 } 4399 4400 #undef __FUNCT__ 4401 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ" 4402 /*@ 4403 MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential 4404 matrices from each processor 4405 4406 Collective on MPI_Comm 4407 4408 Input Parameters: 4409 + comm - the communicators the parallel matrix will live on 4410 . inmat - the input sequential matrices 4411 . n - number of local columns (or PETSC_DECIDE) 4412 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4413 4414 Output Parameter: 4415 . outmat - the parallel matrix generated 4416 4417 Level: advanced 4418 4419 Notes: The number of columns of the matrix in EACH processor MUST be the same. 4420 4421 @*/ 4422 PetscErrorCode MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4423 { 4424 PetscErrorCode ierr; 4425 4426 PetscFunctionBegin; 4427 ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4428 if (scall == MAT_INITIAL_MATRIX){ 4429 ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr); 4430 } 4431 ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr); 4432 ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr); 4433 PetscFunctionReturn(0); 4434 } 4435 4436 #undef __FUNCT__ 4437 #define __FUNCT__ "MatFileSplit" 4438 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4439 { 4440 PetscErrorCode ierr; 4441 PetscMPIInt rank; 4442 PetscInt m,N,i,rstart,nnz; 4443 size_t len; 4444 const PetscInt *indx; 4445 PetscViewer out; 4446 char *name; 4447 Mat B; 4448 const PetscScalar *values; 4449 4450 PetscFunctionBegin; 4451 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4452 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4453 /* Should this be the type of the diagonal block of A? */ 4454 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4455 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4456 ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 4457 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4458 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 4459 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4460 for (i=0;i<m;i++) { 4461 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4462 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4463 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4464 } 4465 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4466 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4467 4468 ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr); 4469 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4470 ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr); 4471 sprintf(name,"%s.%d",outfile,rank); 4472 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4473 ierr = PetscFree(name); 4474 ierr = MatView(B,out);CHKERRQ(ierr); 4475 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4476 ierr = MatDestroy(&B);CHKERRQ(ierr); 4477 PetscFunctionReturn(0); 4478 } 4479 4480 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 4481 #undef __FUNCT__ 4482 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 4483 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4484 { 4485 PetscErrorCode ierr; 4486 Mat_Merge_SeqsToMPI *merge; 4487 PetscContainer container; 4488 4489 PetscFunctionBegin; 4490 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4491 if (container) { 4492 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4505 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4506 ierr = PetscFree(merge);CHKERRQ(ierr); 4507 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4508 } 4509 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #include <../src/mat/utils/freespace.h> 4514 #include <petscbt.h> 4515 4516 #undef __FUNCT__ 4517 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 4518 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4519 { 4520 PetscErrorCode ierr; 4521 MPI_Comm comm=((PetscObject)mpimat)->comm; 4522 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4523 PetscMPIInt size,rank,taga,*len_s; 4524 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j; 4525 PetscInt proc,m; 4526 PetscInt **buf_ri,**buf_rj; 4527 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4528 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4529 MPI_Request *s_waits,*r_waits; 4530 MPI_Status *status; 4531 MatScalar *aa=a->a; 4532 MatScalar **abuf_r,*ba_i; 4533 Mat_Merge_SeqsToMPI *merge; 4534 PetscContainer container; 4535 4536 PetscFunctionBegin; 4537 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4538 4539 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4540 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4541 4542 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr); 4543 ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr); 4544 4545 bi = merge->bi; 4546 bj = merge->bj; 4547 buf_ri = merge->buf_ri; 4548 buf_rj = merge->buf_rj; 4549 4550 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4551 owners = merge->rowmap->range; 4552 len_s = merge->len_s; 4553 4554 /* send and recv matrix values */ 4555 /*-----------------------------*/ 4556 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4557 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4558 4559 ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr); 4560 for (proc=0,k=0; proc<size; proc++){ 4561 if (!len_s[proc]) continue; 4562 i = owners[proc]; 4563 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4564 k++; 4565 } 4566 4567 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4568 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4569 ierr = PetscFree(status);CHKERRQ(ierr); 4570 4571 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4572 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4573 4574 /* insert mat values of mpimat */ 4575 /*----------------------------*/ 4576 ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr); 4577 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4578 4579 for (k=0; k<merge->nrecv; k++){ 4580 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4581 nrows = *(buf_ri_k[k]); 4582 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4583 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4584 } 4585 4586 /* set values of ba */ 4587 m = merge->rowmap->n; 4588 for (i=0; i<m; i++) { 4589 arow = owners[rank] + i; 4590 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4591 bnzi = bi[i+1] - bi[i]; 4592 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4593 4594 /* add local non-zero vals of this proc's seqmat into ba */ 4595 anzi = ai[arow+1] - ai[arow]; 4596 aj = a->j + ai[arow]; 4597 aa = a->a + ai[arow]; 4598 nextaj = 0; 4599 for (j=0; nextaj<anzi; j++){ 4600 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4601 ba_i[j] += aa[nextaj++]; 4602 } 4603 } 4604 4605 /* add received vals into ba */ 4606 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4607 /* i-th row */ 4608 if (i == *nextrow[k]) { 4609 anzi = *(nextai[k]+1) - *nextai[k]; 4610 aj = buf_rj[k] + *(nextai[k]); 4611 aa = abuf_r[k] + *(nextai[k]); 4612 nextaj = 0; 4613 for (j=0; nextaj<anzi; j++){ 4614 if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */ 4615 ba_i[j] += aa[nextaj++]; 4616 } 4617 } 4618 nextrow[k]++; nextai[k]++; 4619 } 4620 } 4621 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4622 } 4623 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4627 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4628 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4629 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4630 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4631 PetscFunctionReturn(0); 4632 } 4633 4634 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4635 4636 #undef __FUNCT__ 4637 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4638 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4639 { 4640 PetscErrorCode ierr; 4641 Mat B_mpi; 4642 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4643 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4644 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4645 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4646 PetscInt len,proc,*dnz,*onz,bs,cbs; 4647 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4648 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4649 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4650 MPI_Status *status; 4651 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 4652 PetscBT lnkbt; 4653 Mat_Merge_SeqsToMPI *merge; 4654 PetscContainer container; 4655 4656 PetscFunctionBegin; 4657 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4658 4659 /* make sure it is a PETSc comm */ 4660 ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr); 4661 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4662 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4663 4664 ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr); 4665 ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr); 4666 4667 /* determine row ownership */ 4668 /*---------------------------------------------------------*/ 4669 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4672 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4673 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4674 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr); 4675 ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr); 4676 4677 m = merge->rowmap->n; 4678 M = merge->rowmap->N; 4679 owners = merge->rowmap->range; 4680 4681 /* determine the number of messages to send, their lengths */ 4682 /*---------------------------------------------------------*/ 4683 len_s = merge->len_s; 4684 4685 len = 0; /* length of buf_si[] */ 4686 merge->nsend = 0; 4687 for (proc=0; proc<size; proc++){ 4688 len_si[proc] = 0; 4689 if (proc == rank){ 4690 len_s[proc] = 0; 4691 } else { 4692 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4693 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4694 } 4695 if (len_s[proc]) { 4696 merge->nsend++; 4697 nrows = 0; 4698 for (i=owners[proc]; i<owners[proc+1]; i++){ 4699 if (ai[i+1] > ai[i]) nrows++; 4700 } 4701 len_si[proc] = 2*(nrows+1); 4702 len += len_si[proc]; 4703 } 4704 } 4705 4706 /* determine the number and length of messages to receive for ij-structure */ 4707 /*-------------------------------------------------------------------------*/ 4708 ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4709 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4710 4711 /* post the Irecv of j-structure */ 4712 /*-------------------------------*/ 4713 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4714 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4715 4716 /* post the Isend of j-structure */ 4717 /*--------------------------------*/ 4718 ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr); 4719 4720 for (proc=0, k=0; proc<size; proc++){ 4721 if (!len_s[proc]) continue; 4722 i = owners[proc]; 4723 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4724 k++; 4725 } 4726 4727 /* receives and sends of j-structure are complete */ 4728 /*------------------------------------------------*/ 4729 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4730 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4731 4732 /* send and recv i-structure */ 4733 /*---------------------------*/ 4734 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4735 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4736 4737 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr); 4738 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4739 for (proc=0,k=0; proc<size; proc++){ 4740 if (!len_s[proc]) continue; 4741 /* form outgoing message for i-structure: 4742 buf_si[0]: nrows to be sent 4743 [1:nrows]: row index (global) 4744 [nrows+1:2*nrows+1]: i-structure index 4745 */ 4746 /*-------------------------------------------*/ 4747 nrows = len_si[proc]/2 - 1; 4748 buf_si_i = buf_si + nrows+1; 4749 buf_si[0] = nrows; 4750 buf_si_i[0] = 0; 4751 nrows = 0; 4752 for (i=owners[proc]; i<owners[proc+1]; i++){ 4753 anzi = ai[i+1] - ai[i]; 4754 if (anzi) { 4755 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4756 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4757 nrows++; 4758 } 4759 } 4760 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4761 k++; 4762 buf_si += len_si[proc]; 4763 } 4764 4765 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4766 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4767 4768 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4769 for (i=0; i<merge->nrecv; i++){ 4770 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4771 } 4772 4773 ierr = PetscFree(len_si);CHKERRQ(ierr); 4774 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4775 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4776 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4777 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4778 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4779 ierr = PetscFree(status);CHKERRQ(ierr); 4780 4781 /* compute a local seq matrix in each processor */ 4782 /*----------------------------------------------*/ 4783 /* allocate bi array and free space for accumulating nonzero column info */ 4784 ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 4785 bi[0] = 0; 4786 4787 /* create and initialize a linked list */ 4788 nlnk = N+1; 4789 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4790 4791 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4792 len = 0; 4793 len = ai[owners[rank+1]] - ai[owners[rank]]; 4794 ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr); 4795 current_space = free_space; 4796 4797 /* determine symbolic info for each local row */ 4798 ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr); 4799 4800 for (k=0; k<merge->nrecv; k++){ 4801 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4802 nrows = *buf_ri_k[k]; 4803 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4804 nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */ 4805 } 4806 4807 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4808 len = 0; 4809 for (i=0;i<m;i++) { 4810 bnzi = 0; 4811 /* add local non-zero cols of this proc's seqmat into lnk */ 4812 arow = owners[rank] + i; 4813 anzi = ai[arow+1] - ai[arow]; 4814 aj = a->j + ai[arow]; 4815 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4816 bnzi += nlnk; 4817 /* add received col data into lnk */ 4818 for (k=0; k<merge->nrecv; k++){ /* k-th received message */ 4819 if (i == *nextrow[k]) { /* i-th row */ 4820 anzi = *(nextai[k]+1) - *nextai[k]; 4821 aj = buf_rj[k] + *nextai[k]; 4822 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4823 bnzi += nlnk; 4824 nextrow[k]++; nextai[k]++; 4825 } 4826 } 4827 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4828 4829 /* if free space is not available, make more free space */ 4830 if (current_space->local_remaining<bnzi) { 4831 ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,¤t_space);CHKERRQ(ierr); 4832 nspacedouble++; 4833 } 4834 /* copy data into free space, then initialize lnk */ 4835 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4836 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4837 4838 current_space->array += bnzi; 4839 current_space->local_used += bnzi; 4840 current_space->local_remaining -= bnzi; 4841 4842 bi[i+1] = bi[i] + bnzi; 4843 } 4844 4845 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4846 4847 ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 4848 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4849 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4850 4851 /* create symbolic parallel matrix B_mpi */ 4852 /*---------------------------------------*/ 4853 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4854 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4855 if (n==PETSC_DECIDE) { 4856 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4857 } else { 4858 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4859 } 4860 ierr = MatSetBlockSizes(B_mpi,bs,cbs); CHKERRQ(ierr); 4861 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4862 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4863 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4864 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4865 4866 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4867 B_mpi->assembled = PETSC_FALSE; 4868 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4869 merge->bi = bi; 4870 merge->bj = bj; 4871 merge->buf_ri = buf_ri; 4872 merge->buf_rj = buf_rj; 4873 merge->coi = PETSC_NULL; 4874 merge->coj = PETSC_NULL; 4875 merge->owners_co = PETSC_NULL; 4876 4877 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4878 4879 /* attach the supporting struct to B_mpi for reuse */ 4880 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4881 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4882 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4883 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4884 *mpimat = B_mpi; 4885 4886 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4887 PetscFunctionReturn(0); 4888 } 4889 4890 #undef __FUNCT__ 4891 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4892 /*@C 4893 MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential 4894 matrices from each processor 4895 4896 Collective on MPI_Comm 4897 4898 Input Parameters: 4899 + comm - the communicators the parallel matrix will live on 4900 . seqmat - the input sequential matrices 4901 . m - number of local rows (or PETSC_DECIDE) 4902 . n - number of local columns (or PETSC_DECIDE) 4903 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4904 4905 Output Parameter: 4906 . mpimat - the parallel matrix generated 4907 4908 Level: advanced 4909 4910 Notes: 4911 The dimensions of the sequential matrix in each processor MUST be the same. 4912 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4913 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4914 @*/ 4915 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4916 { 4917 PetscErrorCode ierr; 4918 PetscMPIInt size; 4919 4920 PetscFunctionBegin; 4921 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4922 if (size == 1){ 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX){ 4925 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4926 } else { 4927 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4928 } 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 if (scall == MAT_INITIAL_MATRIX){ 4934 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4935 } 4936 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4937 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 #undef __FUNCT__ 4942 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4943 /*@ 4944 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4945 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4946 with MatGetSize() 4947 4948 Not Collective 4949 4950 Input Parameters: 4951 + A - the matrix 4952 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4953 4954 Output Parameter: 4955 . A_loc - the local sequential matrix generated 4956 4957 Level: developer 4958 4959 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4960 4961 @*/ 4962 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4963 { 4964 PetscErrorCode ierr; 4965 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4966 Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data; 4967 PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray; 4968 MatScalar *aa=a->a,*ba=b->a,*cam; 4969 PetscScalar *ca; 4970 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4971 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4972 PetscBool match; 4973 4974 PetscFunctionBegin; 4975 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4976 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 4977 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4978 if (scall == MAT_INITIAL_MATRIX){ 4979 ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 4980 ci[0] = 0; 4981 for (i=0; i<am; i++){ 4982 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4983 } 4984 ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr); 4985 ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr); 4986 k = 0; 4987 for (i=0; i<am; i++) { 4988 ncols_o = bi[i+1] - bi[i]; 4989 ncols_d = ai[i+1] - ai[i]; 4990 /* off-diagonal portion of A */ 4991 for (jo=0; jo<ncols_o; jo++) { 4992 col = cmap[*bj]; 4993 if (col >= cstart) break; 4994 cj[k] = col; bj++; 4995 ca[k++] = *ba++; 4996 } 4997 /* diagonal portion of A */ 4998 for (j=0; j<ncols_d; j++) { 4999 cj[k] = cstart + *aj++; 5000 ca[k++] = *aa++; 5001 } 5002 /* off-diagonal portion of A */ 5003 for (j=jo; j<ncols_o; j++) { 5004 cj[k] = cmap[*bj++]; 5005 ca[k++] = *ba++; 5006 } 5007 } 5008 /* put together the new matrix */ 5009 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5010 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5011 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5012 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5013 mat->free_a = PETSC_TRUE; 5014 mat->free_ij = PETSC_TRUE; 5015 mat->nonew = 0; 5016 } else if (scall == MAT_REUSE_MATRIX){ 5017 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5018 ci = mat->i; cj = mat->j; cam = mat->a; 5019 for (i=0; i<am; i++) { 5020 /* off-diagonal portion of A */ 5021 ncols_o = bi[i+1] - bi[i]; 5022 for (jo=0; jo<ncols_o; jo++) { 5023 col = cmap[*bj]; 5024 if (col >= cstart) break; 5025 *cam++ = *ba++; bj++; 5026 } 5027 /* diagonal portion of A */ 5028 ncols_d = ai[i+1] - ai[i]; 5029 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5030 /* off-diagonal portion of A */ 5031 for (j=jo; j<ncols_o; j++) { 5032 *cam++ = *ba++; bj++; 5033 } 5034 } 5035 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5036 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5037 PetscFunctionReturn(0); 5038 } 5039 5040 #undef __FUNCT__ 5041 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 5042 /*@C 5043 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns 5044 5045 Not Collective 5046 5047 Input Parameters: 5048 + A - the matrix 5049 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5050 - row, col - index sets of rows and columns to extract (or PETSC_NULL) 5051 5052 Output Parameter: 5053 . A_loc - the local sequential matrix generated 5054 5055 Level: developer 5056 5057 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5058 5059 @*/ 5060 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5061 { 5062 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5063 PetscErrorCode ierr; 5064 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5065 IS isrowa,iscola; 5066 Mat *aloc; 5067 PetscBool match; 5068 5069 PetscFunctionBegin; 5070 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5071 if (!match) SETERRQ(((PetscObject)A)->comm, PETSC_ERR_SUP,"Requires MPIAIJ matrix as input"); 5072 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5073 if (!row){ 5074 start = A->rmap->rstart; end = A->rmap->rend; 5075 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5076 } else { 5077 isrowa = *row; 5078 } 5079 if (!col){ 5080 start = A->cmap->rstart; 5081 cmap = a->garray; 5082 nzA = a->A->cmap->n; 5083 nzB = a->B->cmap->n; 5084 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5085 ncols = 0; 5086 for (i=0; i<nzB; i++) { 5087 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5088 else break; 5089 } 5090 imark = i; 5091 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5092 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5093 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5094 } else { 5095 iscola = *col; 5096 } 5097 if (scall != MAT_INITIAL_MATRIX){ 5098 ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr); 5099 aloc[0] = *A_loc; 5100 } 5101 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5102 *A_loc = aloc[0]; 5103 ierr = PetscFree(aloc);CHKERRQ(ierr); 5104 if (!row){ 5105 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5106 } 5107 if (!col){ 5108 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5109 } 5110 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5111 PetscFunctionReturn(0); 5112 } 5113 5114 #undef __FUNCT__ 5115 #define __FUNCT__ "MatGetBrowsOfAcols" 5116 /*@C 5117 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5118 5119 Collective on Mat 5120 5121 Input Parameters: 5122 + A,B - the matrices in mpiaij format 5123 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5124 - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL) 5125 5126 Output Parameter: 5127 + rowb, colb - index sets of rows and columns of B to extract 5128 - B_seq - the sequential matrix generated 5129 5130 Level: developer 5131 5132 @*/ 5133 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5134 { 5135 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5136 PetscErrorCode ierr; 5137 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5138 IS isrowb,iscolb; 5139 Mat *bseq=PETSC_NULL; 5140 5141 PetscFunctionBegin; 5142 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5143 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5144 } 5145 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5146 5147 if (scall == MAT_INITIAL_MATRIX){ 5148 start = A->cmap->rstart; 5149 cmap = a->garray; 5150 nzA = a->A->cmap->n; 5151 nzB = a->B->cmap->n; 5152 ierr = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr); 5153 ncols = 0; 5154 for (i=0; i<nzB; i++) { /* row < local row index */ 5155 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5156 else break; 5157 } 5158 imark = i; 5159 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5160 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5161 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5162 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5163 } else { 5164 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5165 isrowb = *rowb; iscolb = *colb; 5166 ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr); 5167 bseq[0] = *B_seq; 5168 } 5169 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5170 *B_seq = bseq[0]; 5171 ierr = PetscFree(bseq);CHKERRQ(ierr); 5172 if (!rowb){ 5173 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5174 } else { 5175 *rowb = isrowb; 5176 } 5177 if (!colb){ 5178 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5179 } else { 5180 *colb = iscolb; 5181 } 5182 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5183 PetscFunctionReturn(0); 5184 } 5185 5186 #undef __FUNCT__ 5187 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 5188 /* 5189 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5190 of the OFF-DIAGONAL portion of local A 5191 5192 Collective on Mat 5193 5194 Input Parameters: 5195 + A,B - the matrices in mpiaij format 5196 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5197 5198 Output Parameter: 5199 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5200 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL) 5201 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL) 5202 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5203 5204 Level: developer 5205 5206 */ 5207 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5208 { 5209 VecScatter_MPI_General *gen_to,*gen_from; 5210 PetscErrorCode ierr; 5211 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5212 Mat_SeqAIJ *b_oth; 5213 VecScatter ctx=a->Mvctx; 5214 MPI_Comm comm=((PetscObject)ctx)->comm; 5215 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5216 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5217 PetscScalar *rvalues,*svalues; 5218 MatScalar *b_otha,*bufa,*bufA; 5219 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5220 MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL; 5221 MPI_Status *sstatus,rstatus; 5222 PetscMPIInt jj; 5223 PetscInt *cols,sbs,rbs; 5224 PetscScalar *vals; 5225 5226 PetscFunctionBegin; 5227 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){ 5228 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5229 } 5230 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5231 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5232 5233 gen_to = (VecScatter_MPI_General*)ctx->todata; 5234 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5235 rvalues = gen_from->values; /* holds the length of receiving row */ 5236 svalues = gen_to->values; /* holds the length of sending row */ 5237 nrecvs = gen_from->n; 5238 nsends = gen_to->n; 5239 5240 ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr); 5241 srow = gen_to->indices; /* local row index to be sent */ 5242 sstarts = gen_to->starts; 5243 sprocs = gen_to->procs; 5244 sstatus = gen_to->sstatus; 5245 sbs = gen_to->bs; 5246 rstarts = gen_from->starts; 5247 rprocs = gen_from->procs; 5248 rbs = gen_from->bs; 5249 5250 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5251 if (scall == MAT_INITIAL_MATRIX){ 5252 /* i-array */ 5253 /*---------*/ 5254 /* post receives */ 5255 for (i=0; i<nrecvs; i++){ 5256 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5257 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5258 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5259 } 5260 5261 /* pack the outgoing message */ 5262 ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr); 5263 sstartsj[0] = 0; rstartsj[0] = 0; 5264 len = 0; /* total length of j or a array to be sent */ 5265 k = 0; 5266 for (i=0; i<nsends; i++){ 5267 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 5268 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5269 for (j=0; j<nrows; j++) { 5270 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5271 for (l=0; l<sbs; l++){ 5272 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */ 5273 rowlen[j*sbs+l] = ncols; 5274 len += ncols; 5275 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); 5276 } 5277 k++; 5278 } 5279 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5280 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5281 } 5282 /* recvs and sends of i-array are completed */ 5283 i = nrecvs; 5284 while (i--) { 5285 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5286 } 5287 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5288 5289 /* allocate buffers for sending j and a arrays */ 5290 ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr); 5291 ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr); 5292 5293 /* create i-array of B_oth */ 5294 ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr); 5295 b_othi[0] = 0; 5296 len = 0; /* total length of j or a array to be received */ 5297 k = 0; 5298 for (i=0; i<nrecvs; i++){ 5299 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 5300 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */ 5301 for (j=0; j<nrows; j++) { 5302 b_othi[k+1] = b_othi[k] + rowlen[j]; 5303 len += rowlen[j]; k++; 5304 } 5305 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5306 } 5307 5308 /* allocate space for j and a arrrays of B_oth */ 5309 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr); 5310 ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr); 5311 5312 /* j-array */ 5313 /*---------*/ 5314 /* post receives of j-array */ 5315 for (i=0; i<nrecvs; i++){ 5316 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5317 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5318 } 5319 5320 /* pack the outgoing message j-array */ 5321 k = 0; 5322 for (i=0; i<nsends; i++){ 5323 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5324 bufJ = bufj+sstartsj[i]; 5325 for (j=0; j<nrows; j++) { 5326 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5327 for (ll=0; ll<sbs; ll++){ 5328 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5329 for (l=0; l<ncols; l++){ 5330 *bufJ++ = cols[l]; 5331 } 5332 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr); 5333 } 5334 } 5335 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5336 } 5337 5338 /* recvs and sends of j-array are completed */ 5339 i = nrecvs; 5340 while (i--) { 5341 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5342 } 5343 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5344 } else if (scall == MAT_REUSE_MATRIX){ 5345 sstartsj = *startsj_s; 5346 rstartsj = *startsj_r; 5347 bufa = *bufa_ptr; 5348 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5349 b_otha = b_oth->a; 5350 } else { 5351 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5352 } 5353 5354 /* a-array */ 5355 /*---------*/ 5356 /* post receives of a-array */ 5357 for (i=0; i<nrecvs; i++){ 5358 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5359 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5360 } 5361 5362 /* pack the outgoing message a-array */ 5363 k = 0; 5364 for (i=0; i<nsends; i++){ 5365 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5366 bufA = bufa+sstartsj[i]; 5367 for (j=0; j<nrows; j++) { 5368 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5369 for (ll=0; ll<sbs; ll++){ 5370 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5371 for (l=0; l<ncols; l++){ 5372 *bufA++ = vals[l]; 5373 } 5374 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr); 5375 } 5376 } 5377 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5378 } 5379 /* recvs and sends of a-array are completed */ 5380 i = nrecvs; 5381 while (i--) { 5382 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5383 } 5384 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5385 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5386 5387 if (scall == MAT_INITIAL_MATRIX){ 5388 /* put together the new matrix */ 5389 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5390 5391 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5392 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5393 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5394 b_oth->free_a = PETSC_TRUE; 5395 b_oth->free_ij = PETSC_TRUE; 5396 b_oth->nonew = 0; 5397 5398 ierr = PetscFree(bufj);CHKERRQ(ierr); 5399 if (!startsj_s || !bufa_ptr){ 5400 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5401 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5402 } else { 5403 *startsj_s = sstartsj; 5404 *startsj_r = rstartsj; 5405 *bufa_ptr = bufa; 5406 } 5407 } 5408 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5409 PetscFunctionReturn(0); 5410 } 5411 5412 #undef __FUNCT__ 5413 #define __FUNCT__ "MatGetCommunicationStructs" 5414 /*@C 5415 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5416 5417 Not Collective 5418 5419 Input Parameters: 5420 . A - The matrix in mpiaij format 5421 5422 Output Parameter: 5423 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5424 . colmap - A map from global column index to local index into lvec 5425 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5426 5427 Level: developer 5428 5429 @*/ 5430 #if defined (PETSC_USE_CTABLE) 5431 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5432 #else 5433 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5434 #endif 5435 { 5436 Mat_MPIAIJ *a; 5437 5438 PetscFunctionBegin; 5439 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5440 PetscValidPointer(lvec, 2); 5441 PetscValidPointer(colmap, 3); 5442 PetscValidPointer(multScatter, 4); 5443 a = (Mat_MPIAIJ *) A->data; 5444 if (lvec) *lvec = a->lvec; 5445 if (colmap) *colmap = a->colmap; 5446 if (multScatter) *multScatter = a->Mvctx; 5447 PetscFunctionReturn(0); 5448 } 5449 5450 EXTERN_C_BEGIN 5451 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*); 5452 extern PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*); 5453 extern PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*); 5454 EXTERN_C_END 5455 5456 #undef __FUNCT__ 5457 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 5458 /* 5459 Computes (B'*A')' since computing B*A directly is untenable 5460 5461 n p p 5462 ( ) ( ) ( ) 5463 m ( A ) * n ( B ) = m ( C ) 5464 ( ) ( ) ( ) 5465 5466 */ 5467 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5468 { 5469 PetscErrorCode ierr; 5470 Mat At,Bt,Ct; 5471 5472 PetscFunctionBegin; 5473 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5474 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5475 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5476 ierr = MatDestroy(&At);CHKERRQ(ierr); 5477 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5478 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5479 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5480 PetscFunctionReturn(0); 5481 } 5482 5483 #undef __FUNCT__ 5484 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 5485 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5486 { 5487 PetscErrorCode ierr; 5488 PetscInt m=A->rmap->n,n=B->cmap->n; 5489 Mat Cmat; 5490 5491 PetscFunctionBegin; 5492 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5493 ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr); 5494 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5495 ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr); 5496 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5497 ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr); 5498 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5499 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5500 *C = Cmat; 5501 (*C)->ops->matmult = MatMatMult_MPIDense_MPIAIJ; 5502 PetscFunctionReturn(0); 5503 } 5504 5505 /* ----------------------------------------------------------------*/ 5506 #undef __FUNCT__ 5507 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 5508 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5509 { 5510 PetscErrorCode ierr; 5511 5512 PetscFunctionBegin; 5513 if (scall == MAT_INITIAL_MATRIX){ 5514 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5515 } 5516 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5517 PetscFunctionReturn(0); 5518 } 5519 5520 EXTERN_C_BEGIN 5521 #if defined(PETSC_HAVE_MUMPS) 5522 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*); 5523 #endif 5524 #if defined(PETSC_HAVE_PASTIX) 5525 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*); 5526 #endif 5527 #if defined(PETSC_HAVE_SUPERLU_DIST) 5528 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*); 5529 #endif 5530 #if defined(PETSC_HAVE_SPOOLES) 5531 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*); 5532 #endif 5533 EXTERN_C_END 5534 5535 /*MC 5536 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5537 5538 Options Database Keys: 5539 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5540 5541 Level: beginner 5542 5543 .seealso: MatCreateAIJ() 5544 M*/ 5545 5546 EXTERN_C_BEGIN 5547 #undef __FUNCT__ 5548 #define __FUNCT__ "MatCreate_MPIAIJ" 5549 PetscErrorCode MatCreate_MPIAIJ(Mat B) 5550 { 5551 Mat_MPIAIJ *b; 5552 PetscErrorCode ierr; 5553 PetscMPIInt size; 5554 5555 PetscFunctionBegin; 5556 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 5557 ierr = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr); 5558 B->data = (void*)b; 5559 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5560 B->assembled = PETSC_FALSE; 5561 B->insertmode = NOT_SET_VALUES; 5562 b->size = size; 5563 ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr); 5564 5565 /* build cache for off array entries formed */ 5566 ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr); 5567 b->donotstash = PETSC_FALSE; 5568 b->colmap = 0; 5569 b->garray = 0; 5570 b->roworiented = PETSC_TRUE; 5571 5572 /* stuff used for matrix vector multiply */ 5573 b->lvec = PETSC_NULL; 5574 b->Mvctx = PETSC_NULL; 5575 5576 /* stuff for MatGetRow() */ 5577 b->rowindices = 0; 5578 b->rowvalues = 0; 5579 b->getrowactive = PETSC_FALSE; 5580 5581 /* flexible pointer used in CUSP/CUSPARSE classes */ 5582 b->spptr = PETSC_NULL; 5583 #if defined(PETSC_HAVE_SPOOLES) 5584 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C", 5585 "MatGetFactor_mpiaij_spooles", 5586 MatGetFactor_mpiaij_spooles);CHKERRQ(ierr); 5587 #endif 5588 #if defined(PETSC_HAVE_MUMPS) 5589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", 5590 "MatGetFactor_aij_mumps", 5591 MatGetFactor_aij_mumps);CHKERRQ(ierr); 5592 #endif 5593 #if defined(PETSC_HAVE_PASTIX) 5594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C", 5595 "MatGetFactor_mpiaij_pastix", 5596 MatGetFactor_mpiaij_pastix);CHKERRQ(ierr); 5597 #endif 5598 #if defined(PETSC_HAVE_SUPERLU_DIST) 5599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C", 5600 "MatGetFactor_mpiaij_superlu_dist", 5601 MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr); 5602 #endif 5603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 5604 "MatStoreValues_MPIAIJ", 5605 MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 5607 "MatRetrieveValues_MPIAIJ", 5608 MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 5610 "MatGetDiagonalBlock_MPIAIJ", 5611 MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr); 5612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C", 5613 "MatIsTranspose_MPIAIJ", 5614 MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C", 5616 "MatMPIAIJSetPreallocation_MPIAIJ", 5617 MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C", 5619 "MatMPIAIJSetPreallocationCSR_MPIAIJ", 5620 MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C", 5622 "MatDiagonalScaleLocal_MPIAIJ", 5623 MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C", 5625 "MatConvert_MPIAIJ_MPIAIJPERM", 5626 MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C", 5628 "MatConvert_MPIAIJ_MPIAIJCRL", 5629 MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C", 5631 "MatConvert_MPIAIJ_MPISBAIJ", 5632 MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C", 5634 "MatMatMult_MPIDense_MPIAIJ", 5635 MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C", 5637 "MatMatMultSymbolic_MPIDense_MPIAIJ", 5638 MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C", 5640 "MatMatMultNumeric_MPIDense_MPIAIJ", 5641 MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5642 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5643 PetscFunctionReturn(0); 5644 } 5645 EXTERN_C_END 5646 5647 #undef __FUNCT__ 5648 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5649 /*@ 5650 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5651 and "off-diagonal" part of the matrix in CSR format. 5652 5653 Collective on MPI_Comm 5654 5655 Input Parameters: 5656 + comm - MPI communicator 5657 . m - number of local rows (Cannot be PETSC_DECIDE) 5658 . n - This value should be the same as the local size used in creating the 5659 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5660 calculated if N is given) For square matrices n is almost always m. 5661 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5662 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5663 . i - row indices for "diagonal" portion of matrix 5664 . j - column indices 5665 . a - matrix values 5666 . oi - row indices for "off-diagonal" portion of matrix 5667 . oj - column indices 5668 - oa - matrix values 5669 5670 Output Parameter: 5671 . mat - the matrix 5672 5673 Level: advanced 5674 5675 Notes: 5676 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5677 must free the arrays once the matrix has been destroyed and not before. 5678 5679 The i and j indices are 0 based 5680 5681 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5682 5683 This sets local rows and cannot be used to set off-processor values. 5684 5685 You cannot later use MatSetValues() to change values in this matrix. 5686 5687 .keywords: matrix, aij, compressed row, sparse, parallel 5688 5689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5690 MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5691 @*/ 5692 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[], 5693 PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5694 { 5695 PetscErrorCode ierr; 5696 Mat_MPIAIJ *maij; 5697 5698 PetscFunctionBegin; 5699 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5700 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5701 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5702 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5703 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5704 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5705 maij = (Mat_MPIAIJ*) (*mat)->data; 5706 maij->donotstash = PETSC_TRUE; 5707 (*mat)->preallocated = PETSC_TRUE; 5708 5709 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5710 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5711 5712 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5713 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5714 5715 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5716 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5717 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5718 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5719 5720 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5721 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5722 PetscFunctionReturn(0); 5723 } 5724 5725 /* 5726 Special version for direct calls from Fortran 5727 */ 5728 #include <petsc-private/fortranimpl.h> 5729 5730 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5731 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5732 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5733 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5734 #endif 5735 5736 /* Change these macros so can be used in void function */ 5737 #undef CHKERRQ 5738 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5739 #undef SETERRQ2 5740 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5741 #undef SETERRQ3 5742 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5743 #undef SETERRQ 5744 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5745 5746 EXTERN_C_BEGIN 5747 #undef __FUNCT__ 5748 #define __FUNCT__ "matsetvaluesmpiaij_" 5749 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5750 { 5751 Mat mat = *mmat; 5752 PetscInt m = *mm, n = *mn; 5753 InsertMode addv = *maddv; 5754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5755 PetscScalar value; 5756 PetscErrorCode ierr; 5757 5758 MatCheckPreallocated(mat,1); 5759 if (mat->insertmode == NOT_SET_VALUES) { 5760 mat->insertmode = addv; 5761 } 5762 #if defined(PETSC_USE_DEBUG) 5763 else if (mat->insertmode != addv) { 5764 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5765 } 5766 #endif 5767 { 5768 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5769 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5770 PetscBool roworiented = aij->roworiented; 5771 5772 /* Some Variables required in the macro */ 5773 Mat A = aij->A; 5774 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5775 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5776 MatScalar *aa = a->a; 5777 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE); 5778 Mat B = aij->B; 5779 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5780 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5781 MatScalar *ba = b->a; 5782 5783 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5784 PetscInt nonew = a->nonew; 5785 MatScalar *ap1,*ap2; 5786 5787 PetscFunctionBegin; 5788 for (i=0; i<m; i++) { 5789 if (im[i] < 0) continue; 5790 #if defined(PETSC_USE_DEBUG) 5791 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5792 #endif 5793 if (im[i] >= rstart && im[i] < rend) { 5794 row = im[i] - rstart; 5795 lastcol1 = -1; 5796 rp1 = aj + ai[row]; 5797 ap1 = aa + ai[row]; 5798 rmax1 = aimax[row]; 5799 nrow1 = ailen[row]; 5800 low1 = 0; 5801 high1 = nrow1; 5802 lastcol2 = -1; 5803 rp2 = bj + bi[row]; 5804 ap2 = ba + bi[row]; 5805 rmax2 = bimax[row]; 5806 nrow2 = bilen[row]; 5807 low2 = 0; 5808 high2 = nrow2; 5809 5810 for (j=0; j<n; j++) { 5811 if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 5812 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5813 if (in[j] >= cstart && in[j] < cend){ 5814 col = in[j] - cstart; 5815 MatSetValues_SeqAIJ_A_Private(row,col,value,addv); 5816 } else if (in[j] < 0) continue; 5817 #if defined(PETSC_USE_DEBUG) 5818 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5819 #endif 5820 else { 5821 if (mat->was_assembled) { 5822 if (!aij->colmap) { 5823 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5824 } 5825 #if defined (PETSC_USE_CTABLE) 5826 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5827 col--; 5828 #else 5829 col = aij->colmap[in[j]] - 1; 5830 #endif 5831 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5832 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5833 col = in[j]; 5834 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5835 B = aij->B; 5836 b = (Mat_SeqAIJ*)B->data; 5837 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5838 rp2 = bj + bi[row]; 5839 ap2 = ba + bi[row]; 5840 rmax2 = bimax[row]; 5841 nrow2 = bilen[row]; 5842 low2 = 0; 5843 high2 = nrow2; 5844 bm = aij->B->rmap->n; 5845 ba = b->a; 5846 } 5847 } else col = in[j]; 5848 MatSetValues_SeqAIJ_B_Private(row,col,value,addv); 5849 } 5850 } 5851 } else { 5852 if (!aij->donotstash) { 5853 if (roworiented) { 5854 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5855 } else { 5856 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5857 } 5858 } 5859 } 5860 }} 5861 PetscFunctionReturnVoid(); 5862 } 5863 EXTERN_C_END 5864 5865