1 /*$Id: baij.c,v 1.194 1999/11/24 21:53:59 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 37 int MatMarkDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 40 int i,j,*diag,m = a->mbs; 41 42 PetscFunctionBegin; 43 if (a->diag) PetscFunctionReturn(0); 44 45 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 46 PLogObjectMemory(A,(m+1)*sizeof(int)); 47 for (i=0; i<m; i++) { 48 diag[i] = a->i[i+1]; 49 for (j=a->i[i]; j<a->i[i+1]; j++) { 50 if (a->j[j] == i) { 51 diag[i] = j; 52 break; 53 } 54 } 55 } 56 a->diag = diag; 57 PetscFunctionReturn(0); 58 } 59 60 61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 62 63 #undef __FUNC__ 64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 66 { 67 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 68 int ierr,n = a->mbs,i; 69 70 PetscFunctionBegin; 71 *nn = n; 72 if (!ia) PetscFunctionReturn(0); 73 if (symmetric) { 74 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 75 } else if (oshift == 1) { 76 /* temporarily add 1 to i and j indices */ 77 int nz = a->i[n] + 1; 78 for (i=0; i<nz; i++) a->j[i]++; 79 for (i=0; i<n+1; i++) a->i[i]++; 80 *ia = a->i; *ja = a->j; 81 } else { 82 *ia = a->i; *ja = a->j; 83 } 84 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 91 PetscTruth *done) 92 { 93 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 94 int i,n = a->mbs,ierr; 95 96 PetscFunctionBegin; 97 if (!ia) PetscFunctionReturn(0); 98 if (symmetric) { 99 ierr = PetscFree(*ia);CHKERRQ(ierr); 100 ierr = PetscFree(*ja);CHKERRQ(ierr); 101 } else if (oshift == 1) { 102 int nz = a->i[n]; 103 for (i=0; i<nz; i++) a->j[i]--; 104 for (i=0; i<n+1; i++) a->i[i]--; 105 } 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNC__ 110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 112 { 113 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 114 115 PetscFunctionBegin; 116 *bs = baij->bs; 117 PetscFunctionReturn(0); 118 } 119 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatDestroy_SeqBAIJ" 123 int MatDestroy_SeqBAIJ(Mat A) 124 { 125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(PETSC_USE_LOG) 142 PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 ierr = PetscFree(a);CHKERRQ(ierr); 163 PLogObjectDestroy(A); 164 PetscHeaderDestroy(A); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNC__ 169 #define __FUNC__ "MatSetOption_SeqBAIJ" 170 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 174 PetscFunctionBegin; 175 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 176 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 177 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 178 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 179 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 180 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 181 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 182 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 183 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 184 else if (op == MAT_ROWS_SORTED || 185 op == MAT_ROWS_UNSORTED || 186 op == MAT_SYMMETRIC || 187 op == MAT_STRUCTURALLY_SYMMETRIC || 188 op == MAT_YES_NEW_DIAGONALS || 189 op == MAT_IGNORE_OFF_PROC_ENTRIES || 190 op == MAT_USE_HASH_TABLE) { 191 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 192 } else if (op == MAT_NO_NEW_DIAGONALS) { 193 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 194 } else { 195 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatGetSize_SeqBAIJ" 203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 204 { 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 206 207 PetscFunctionBegin; 208 if (m) *m = a->m; 209 if (n) *n = a->n; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 219 PetscFunctionBegin; 220 *m = 0; *n = a->m; 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatGetRow_SeqBAIJ" 226 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 227 { 228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 229 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 230 MatScalar *aa,*aa_i; 231 Scalar *v_i; 232 233 PetscFunctionBegin; 234 bs = a->bs; 235 ai = a->i; 236 aj = a->j; 237 aa = a->a; 238 bs2 = a->bs2; 239 240 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 241 242 bn = row/bs; /* Block number */ 243 bp = row % bs; /* Block Position */ 244 M = ai[bn+1] - ai[bn]; 245 *nz = bs*M; 246 247 if (v) { 248 *v = 0; 249 if (*nz) { 250 *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v); 251 for (i=0; i<M; i++) { /* for each block in the block row */ 252 v_i = *v + i*bs; 253 aa_i = aa + bs2*(ai[bn] + i); 254 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 255 } 256 } 257 } 258 259 if (idx) { 260 *idx = 0; 261 if (*nz) { 262 *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx); 263 for (i=0; i<M; i++) { /* for each block in the block row */ 264 idx_i = *idx + i*bs; 265 itmp = bs*aj[ai[bn] + i]; 266 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 275 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 276 { 277 int ierr; 278 279 PetscFunctionBegin; 280 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 281 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatTranspose_SeqBAIJ" 287 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 288 { 289 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 290 Mat C; 291 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 292 int *rows,*cols,bs2=a->bs2,refct; 293 MatScalar *array = a->a; 294 295 PetscFunctionBegin; 296 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 297 col = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 298 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 299 300 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 301 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 302 ierr = PetscFree(col);CHKERRQ(ierr); 303 rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 304 cols = rows + bs; 305 for (i=0; i<mbs; i++) { 306 cols[0] = i*bs; 307 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 308 len = ai[i+1] - ai[i]; 309 for (j=0; j<len; j++) { 310 rows[0] = (*aj++)*bs; 311 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 312 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 313 array += bs2; 314 } 315 } 316 ierr = PetscFree(rows);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 if (B) { 322 *B = C; 323 } else { 324 PetscOps *Abops; 325 MatOps Aops; 326 327 /* This isn't really an in-place transpose */ 328 ierr = PetscFree(a->a);CHKERRQ(ierr); 329 if (!a->singlemalloc) { 330 ierr = PetscFree(a->i);CHKERRQ(ierr); 331 ierr = PetscFree(a->j);CHKERRQ(ierr); 332 } 333 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 334 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 335 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 336 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 337 ierr = PetscFree(a);CHKERRQ(ierr); 338 339 340 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 341 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 342 343 /* 344 This is horrible, horrible code. We need to keep the 345 A pointers for the bops and ops but copy everything 346 else from C. 347 */ 348 Abops = A->bops; 349 Aops = A->ops; 350 refct = A->refct; 351 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 352 A->bops = Abops; 353 A->ops = Aops; 354 A->qlist = 0; 355 A->refct = refct; 356 357 PetscHeaderDestroy(C); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNC__ 363 #define __FUNC__ "MatView_SeqBAIJ_Binary" 364 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 368 Scalar *aa; 369 FILE *file; 370 371 PetscFunctionBegin; 372 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 373 col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 374 col_lens[0] = MAT_COOKIE; 375 376 col_lens[1] = a->m; 377 col_lens[2] = a->n; 378 col_lens[3] = a->nz*bs2; 379 380 /* store lengths of each row and write (including header) to file */ 381 count = 0; 382 for (i=0; i<a->mbs; i++) { 383 for (j=0; j<bs; j++) { 384 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 385 } 386 } 387 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 388 ierr = PetscFree(col_lens);CHKERRQ(ierr); 389 390 /* store column indices (zero start index) */ 391 jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 392 count = 0; 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 jj[count++] = bs*a->j[k] + l; 398 } 399 } 400 } 401 } 402 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 403 ierr = PetscFree(jj);CHKERRQ(ierr); 404 405 /* store nonzero values */ 406 aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 407 count = 0; 408 for (i=0; i<a->mbs; i++) { 409 for (j=0; j<bs; j++) { 410 for (k=a->i[i]; k<a->i[i+1]; k++) { 411 for (l=0; l<bs; l++) { 412 aa[count++] = a->a[bs2*k + l*bs + j]; 413 } 414 } 415 } 416 } 417 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 418 ierr = PetscFree(aa);CHKERRQ(ierr); 419 420 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 421 if (file) { 422 fprintf(file,"-matload_block_size %d\n",a->bs); 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNC__ 428 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 429 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 433 char *outputname; 434 435 PetscFunctionBegin; 436 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 437 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 439 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 442 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 443 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 MPI_Comm comm; 511 Viewer viewer; 512 513 PetscFunctionBegin; 514 /* 515 This is nasty. If this is called from an originally parallel matrix 516 then all processes call this,but only the first has the matrix so the 517 rest should return immediately. 518 */ 519 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 520 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 521 if (rank) PetscFunctionReturn(0); 522 523 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 524 525 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 526 527 /* loop over matrix elements drawing boxes */ 528 color = DRAW_BLUE; 529 for (i=0,row=0; i<mbs; i++,row+=bs) { 530 for (j=a->i[i]; j<a->i[i+1]; j++) { 531 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 532 x_l = a->j[j]*bs; x_r = x_l + 1.0; 533 aa = a->a + j*bs2; 534 for (k=0; k<bs; k++) { 535 for (l=0; l<bs; l++) { 536 if (PetscRealPart(*aa++) >= 0.) continue; 537 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 538 } 539 } 540 } 541 } 542 color = DRAW_CYAN; 543 for (i=0,row=0; i<mbs; i++,row+=bs) { 544 for (j=a->i[i]; j<a->i[i+1]; j++) { 545 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 546 x_l = a->j[j]*bs; x_r = x_l + 1.0; 547 aa = a->a + j*bs2; 548 for (k=0; k<bs; k++) { 549 for (l=0; l<bs; l++) { 550 if (PetscRealPart(*aa++) != 0.) continue; 551 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 552 } 553 } 554 } 555 } 556 557 color = DRAW_RED; 558 for (i=0,row=0; i<mbs; i++,row+=bs) { 559 for (j=a->i[i]; j<a->i[i+1]; j++) { 560 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 561 x_l = a->j[j]*bs; x_r = x_l + 1.0; 562 aa = a->a + j*bs2; 563 for (k=0; k<bs; k++) { 564 for (l=0; l<bs; l++) { 565 if (PetscRealPart(*aa++) <= 0.) continue; 566 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 567 } 568 } 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNC__ 575 #define __FUNC__ "MatView_SeqBAIJ_Draw" 576 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 577 { 578 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 579 int ierr; 580 PetscReal xl,yl,xr,yr,w,h; 581 Draw draw; 582 PetscTruth isnull; 583 584 PetscFunctionBegin; 585 586 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 587 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 588 589 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 590 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 591 xr += w; yr += h; xl = -w; yl = -h; 592 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 593 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 594 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 595 PetscFunctionReturn(0); 596 } 597 598 #undef __FUNC__ 599 #define __FUNC__ "MatView_SeqBAIJ" 600 int MatView_SeqBAIJ(Mat A,Viewer viewer) 601 { 602 int ierr; 603 PetscTruth issocket,isascii,isbinary,isdraw; 604 605 PetscFunctionBegin; 606 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 607 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 608 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 609 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 610 if (issocket) { 611 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 612 } else if (isascii){ 613 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 614 } else if (isbinary) { 615 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 616 } else if (isdraw) { 617 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 618 } else { 619 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 620 } 621 PetscFunctionReturn(0); 622 } 623 624 625 #undef __FUNC__ 626 #define __FUNC__ "MatGetValues_SeqBAIJ" 627 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 628 { 629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 630 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 631 int *ai = a->i,*ailen = a->ilen; 632 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 633 MatScalar *ap,*aa = a->a,zero = 0.0; 634 635 PetscFunctionBegin; 636 for (k=0; k<m; k++) { /* loop over rows */ 637 row = im[k]; brow = row/bs; 638 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 639 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 640 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 641 nrow = ailen[brow]; 642 for (l=0; l<n; l++) { /* loop over columns */ 643 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 644 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 645 col = in[l] ; 646 bcol = col/bs; 647 cidx = col%bs; 648 ridx = row%bs; 649 high = nrow; 650 low = 0; /* assume unsorted */ 651 while (high-low > 5) { 652 t = (low+high)/2; 653 if (rp[t] > bcol) high = t; 654 else low = t; 655 } 656 for (i=low; i<high; i++) { 657 if (rp[i] > bcol) break; 658 if (rp[i] == bcol) { 659 *v++ = ap[bs2*i+bs*cidx+ridx]; 660 goto finished; 661 } 662 } 663 *v++ = zero; 664 finished:; 665 } 666 } 667 PetscFunctionReturn(0); 668 } 669 670 671 #undef __FUNC__ 672 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 673 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 674 { 675 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 676 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 677 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 678 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 679 Scalar *value = v; 680 MatScalar *ap,*aa=a->a,*bap; 681 682 PetscFunctionBegin; 683 if (roworiented) { 684 stepval = (n-1)*bs; 685 } else { 686 stepval = (m-1)*bs; 687 } 688 for (k=0; k<m; k++) { /* loop over added rows */ 689 row = im[k]; 690 if (row < 0) continue; 691 #if defined(PETSC_USE_BOPT_g) 692 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 693 #endif 694 rp = aj + ai[row]; 695 ap = aa + bs2*ai[row]; 696 rmax = imax[row]; 697 nrow = ailen[row]; 698 low = 0; 699 for (l=0; l<n; l++) { /* loop over added columns */ 700 if (in[l] < 0) continue; 701 #if defined(PETSC_USE_BOPT_g) 702 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 703 #endif 704 col = in[l]; 705 if (roworiented) { 706 value = v + k*(stepval+bs)*bs + l*bs; 707 } else { 708 value = v + l*(stepval+bs)*bs + k*bs; 709 } 710 if (!sorted) low = 0; high = nrow; 711 while (high-low > 7) { 712 t = (low+high)/2; 713 if (rp[t] > col) high = t; 714 else low = t; 715 } 716 for (i=low; i<high; i++) { 717 if (rp[i] > col) break; 718 if (rp[i] == col) { 719 bap = ap + bs2*i; 720 if (roworiented) { 721 if (is == ADD_VALUES) { 722 for (ii=0; ii<bs; ii++,value+=stepval) { 723 for (jj=ii; jj<bs2; jj+=bs) { 724 bap[jj] += *value++; 725 } 726 } 727 } else { 728 for (ii=0; ii<bs; ii++,value+=stepval) { 729 for (jj=ii; jj<bs2; jj+=bs) { 730 bap[jj] = *value++; 731 } 732 } 733 } 734 } else { 735 if (is == ADD_VALUES) { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=0; jj<bs; jj++) { 738 *bap++ += *value++; 739 } 740 } 741 } else { 742 for (ii=0; ii<bs; ii++,value+=stepval) { 743 for (jj=0; jj<bs; jj++) { 744 *bap++ = *value++; 745 } 746 } 747 } 748 } 749 goto noinsert2; 750 } 751 } 752 if (nonew == 1) goto noinsert2; 753 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 754 if (nrow >= rmax) { 755 /* there is no extra room in row, therefore enlarge */ 756 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 757 MatScalar *new_a; 758 759 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 760 761 /* malloc new storage space */ 762 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 763 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 764 new_j = (int*)(new_a + bs2*new_nz); 765 new_i = new_j + new_nz; 766 767 /* copy over old data into new slots */ 768 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 769 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 770 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 771 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 772 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 773 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 774 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 775 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 776 /* free up old matrix storage */ 777 ierr = PetscFree(a->a);CHKERRQ(ierr); 778 if (!a->singlemalloc) { 779 ierr = PetscFree(a->i);CHKERRQ(ierr); 780 ierr = PetscFree(a->j);CHKERRQ(ierr); 781 } 782 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 783 a->singlemalloc = PETSC_TRUE; 784 785 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 786 rmax = imax[row] = imax[row] + CHUNKSIZE; 787 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 788 a->maxnz += bs2*CHUNKSIZE; 789 a->reallocs++; 790 a->nz++; 791 } 792 N = nrow++ - 1; 793 /* shift up all the later entries in this row */ 794 for (ii=N; ii>=i; ii--) { 795 rp[ii+1] = rp[ii]; 796 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 797 } 798 if (N >= i) { 799 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 800 } 801 rp[i] = col; 802 bap = ap + bs2*i; 803 if (roworiented) { 804 for (ii=0; ii<bs; ii++,value+=stepval) { 805 for (jj=ii; jj<bs2; jj+=bs) { 806 bap[jj] = *value++; 807 } 808 } 809 } else { 810 for (ii=0; ii<bs; ii++,value+=stepval) { 811 for (jj=0; jj<bs; jj++) { 812 *bap++ = *value++; 813 } 814 } 815 } 816 noinsert2:; 817 low = i; 818 } 819 ailen[row] = nrow; 820 } 821 PetscFunctionReturn(0); 822 } 823 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 827 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 830 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 831 int m = a->m,*ip,N,*ailen = a->ilen; 832 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 833 MatScalar *aa = a->a,*ap; 834 835 PetscFunctionBegin; 836 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 837 838 if (m) rmax = ailen[0]; 839 for (i=1; i<mbs; i++) { 840 /* move each row back by the amount of empty slots (fshift) before it*/ 841 fshift += imax[i-1] - ailen[i-1]; 842 rmax = PetscMax(rmax,ailen[i]); 843 if (fshift) { 844 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 845 N = ailen[i]; 846 for (j=0; j<N; j++) { 847 ip[j-fshift] = ip[j]; 848 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 849 } 850 } 851 ai[i] = ai[i-1] + ailen[i-1]; 852 } 853 if (mbs) { 854 fshift += imax[mbs-1] - ailen[mbs-1]; 855 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 856 } 857 /* reset ilen and imax for each row */ 858 for (i=0; i<mbs; i++) { 859 ailen[i] = imax[i] = ai[i+1] - ai[i]; 860 } 861 a->nz = ai[mbs]; 862 863 /* diagonals may have moved, so kill the diagonal pointers */ 864 if (fshift && a->diag) { 865 ierr = PetscFree(a->diag);CHKERRQ(ierr); 866 PLogObjectMemory(A,-(m+1)*sizeof(int)); 867 a->diag = 0; 868 } 869 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 870 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 871 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 872 a->reallocs); 873 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 874 a->reallocs = 0; 875 A->info.nz_unneeded = (PetscReal)fshift*bs2; 876 877 PetscFunctionReturn(0); 878 } 879 880 881 882 /* 883 This function returns an array of flags which indicate the locations of contiguous 884 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 885 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 886 Assume: sizes should be long enough to hold all the values. 887 */ 888 #undef __FUNC__ 889 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 890 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 891 { 892 int i,j,k,row; 893 PetscTruth flg; 894 895 PetscFunctionBegin; 896 for (i=0,j=0; i<n; j++) { 897 row = idx[i]; 898 if (row%bs!=0) { /* Not the begining of a block */ 899 sizes[j] = 1; 900 i++; 901 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 902 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 903 i++; 904 } else { /* Begining of the block, so check if the complete block exists */ 905 flg = PETSC_TRUE; 906 for (k=1; k<bs; k++) { 907 if (row+k != idx[i+k]) { /* break in the block */ 908 flg = PETSC_FALSE; 909 break; 910 } 911 } 912 if (flg == PETSC_TRUE) { /* No break in the bs */ 913 sizes[j] = bs; 914 i+= bs; 915 } else { 916 sizes[j] = 1; 917 i++; 918 } 919 } 920 } 921 *bs_max = j; 922 PetscFunctionReturn(0); 923 } 924 925 #undef __FUNC__ 926 #define __FUNC__ "MatZeroRows_SeqBAIJ" 927 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 928 { 929 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 930 int ierr,i,j,k,count,is_n,*is_idx,*rows; 931 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 932 Scalar zero = 0.0; 933 MatScalar *aa; 934 935 PetscFunctionBegin; 936 /* Make a copy of the IS and sort it */ 937 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 938 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 939 940 /* allocate memory for rows,sizes */ 941 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 942 sizes = rows + is_n; 943 944 /* initialize copy IS valurs to rows, and sort them */ 945 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 946 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 947 if (baij->keepzeroedrows) { 948 for (i=0; i<is_n; i++) { sizes[i] = 1; } 949 bs_max = is_n; 950 } else { 951 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 952 } 953 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 954 955 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 956 row = rows[j]; 957 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 958 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 959 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 960 if (sizes[i] == bs && !baij->keepzeroedrows) { 961 if (diag) { 962 if (baij->ilen[row/bs] > 0) { 963 baij->ilen[row/bs] = 1; 964 baij->j[baij->i[row/bs]] = row/bs; 965 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 966 } 967 /* Now insert all the diagoanl values for this bs */ 968 for (k=0; k<bs; k++) { 969 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 970 } 971 } else { /* (!diag) */ 972 baij->ilen[row/bs] = 0; 973 } /* end (!diag) */ 974 } else { /* (sizes[i] != bs) */ 975 #if defined (PETSC_USE_DEBUG) 976 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 977 #endif 978 for (k=0; k<count; k++) { 979 aa[0] = zero; 980 aa+=bs; 981 } 982 if (diag) { 983 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 984 } 985 } 986 } 987 988 ierr = PetscFree(rows);CHKERRQ(ierr); 989 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatSetValues_SeqBAIJ" 995 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 996 { 997 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 998 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 999 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 1000 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1001 int ridx,cidx,bs2=a->bs2,ierr; 1002 MatScalar *ap,value,*aa=a->a,*bap; 1003 1004 PetscFunctionBegin; 1005 for (k=0; k<m; k++) { /* loop over added rows */ 1006 row = im[k]; brow = row/bs; 1007 if (row < 0) continue; 1008 #if defined(PETSC_USE_BOPT_g) 1009 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1010 #endif 1011 rp = aj + ai[brow]; 1012 ap = aa + bs2*ai[brow]; 1013 rmax = imax[brow]; 1014 nrow = ailen[brow]; 1015 low = 0; 1016 for (l=0; l<n; l++) { /* loop over added columns */ 1017 if (in[l] < 0) continue; 1018 #if defined(PETSC_USE_BOPT_g) 1019 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1020 #endif 1021 col = in[l]; bcol = col/bs; 1022 ridx = row % bs; cidx = col % bs; 1023 if (roworiented) { 1024 value = v[l + k*n]; 1025 } else { 1026 value = v[k + l*m]; 1027 } 1028 if (!sorted) low = 0; high = nrow; 1029 while (high-low > 7) { 1030 t = (low+high)/2; 1031 if (rp[t] > bcol) high = t; 1032 else low = t; 1033 } 1034 for (i=low; i<high; i++) { 1035 if (rp[i] > bcol) break; 1036 if (rp[i] == bcol) { 1037 bap = ap + bs2*i + bs*cidx + ridx; 1038 if (is == ADD_VALUES) *bap += value; 1039 else *bap = value; 1040 goto noinsert1; 1041 } 1042 } 1043 if (nonew == 1) goto noinsert1; 1044 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1045 if (nrow >= rmax) { 1046 /* there is no extra room in row, therefore enlarge */ 1047 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1048 MatScalar *new_a; 1049 1050 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1051 1052 /* Malloc new storage space */ 1053 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1054 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 1055 new_j = (int*)(new_a + bs2*new_nz); 1056 new_i = new_j + new_nz; 1057 1058 /* copy over old data into new slots */ 1059 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1060 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1061 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1062 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1063 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1064 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1065 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1066 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1067 /* free up old matrix storage */ 1068 ierr = PetscFree(a->a);CHKERRQ(ierr); 1069 if (!a->singlemalloc) { 1070 ierr = PetscFree(a->i);CHKERRQ(ierr); 1071 ierr = PetscFree(a->j);CHKERRQ(ierr); 1072 } 1073 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1074 a->singlemalloc = PETSC_TRUE; 1075 1076 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1077 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1078 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1079 a->maxnz += bs2*CHUNKSIZE; 1080 a->reallocs++; 1081 a->nz++; 1082 } 1083 N = nrow++ - 1; 1084 /* shift up all the later entries in this row */ 1085 for (ii=N; ii>=i; ii--) { 1086 rp[ii+1] = rp[ii]; 1087 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1088 } 1089 if (N>=i) { 1090 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1091 } 1092 rp[i] = bcol; 1093 ap[bs2*i + bs*cidx + ridx] = value; 1094 noinsert1:; 1095 low = i; 1096 } 1097 ailen[brow] = nrow; 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*); 1103 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal); 1104 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1105 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1106 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1107 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1108 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1109 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1110 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1111 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1112 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1113 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1114 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1115 extern int MatZeroEntries_SeqBAIJ(Mat); 1116 1117 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1118 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1119 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1120 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1121 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1122 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1123 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1124 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1125 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1126 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1127 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1128 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1129 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1130 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1131 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1132 1133 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1134 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1135 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1136 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1137 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1138 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1139 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1140 1141 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1142 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1143 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1144 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1145 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1146 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1147 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1148 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1149 1150 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1151 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1152 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1153 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1154 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1155 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1156 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1157 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1158 1159 #undef __FUNC__ 1160 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1161 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1162 { 1163 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1164 Mat outA; 1165 int ierr; 1166 PetscTruth row_identity,col_identity; 1167 1168 PetscFunctionBegin; 1169 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1170 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1171 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1172 if (!row_identity || !col_identity) { 1173 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1174 } 1175 1176 outA = inA; 1177 inA->factor = FACTOR_LU; 1178 1179 if (!a->diag) { 1180 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1181 } 1182 /* 1183 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1184 for ILU(0) factorization with natural ordering 1185 */ 1186 switch (a->bs) { 1187 case 1: 1188 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1189 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1190 case 2: 1191 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1192 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1193 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1194 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1195 break; 1196 case 3: 1197 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1198 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1199 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1200 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1201 break; 1202 case 4: 1203 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1204 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1205 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1206 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1207 break; 1208 case 5: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1211 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1212 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1213 break; 1214 case 6: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1217 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1218 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1219 break; 1220 case 7: 1221 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1222 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1223 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1224 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1225 break; 1226 default: 1227 a->row = row; 1228 a->col = col; 1229 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1230 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1231 1232 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1233 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1234 PLogObjectParent(inA,a->icol); 1235 1236 if (!a->solve_work) { 1237 a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1238 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1239 } 1240 } 1241 1242 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1243 1244 PetscFunctionReturn(0); 1245 } 1246 #undef __FUNC__ 1247 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1248 int MatPrintHelp_SeqBAIJ(Mat A) 1249 { 1250 static PetscTruth called = PETSC_FALSE; 1251 MPI_Comm comm = A->comm; 1252 int ierr; 1253 1254 PetscFunctionBegin; 1255 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1256 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1257 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 EXTERN_C_BEGIN 1262 #undef __FUNC__ 1263 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1264 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1265 { 1266 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1267 int i,nz,n; 1268 1269 PetscFunctionBegin; 1270 nz = baij->maxnz; 1271 n = baij->n; 1272 for (i=0; i<nz; i++) { 1273 baij->j[i] = indices[i]; 1274 } 1275 baij->nz = nz; 1276 for (i=0; i<n; i++) { 1277 baij->ilen[i] = baij->imax[i]; 1278 } 1279 1280 PetscFunctionReturn(0); 1281 } 1282 EXTERN_C_END 1283 1284 #undef __FUNC__ 1285 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1286 /*@ 1287 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1288 in the matrix. 1289 1290 Input Parameters: 1291 + mat - the SeqBAIJ matrix 1292 - indices - the column indices 1293 1294 Level: advanced 1295 1296 Notes: 1297 This can be called if you have precomputed the nonzero structure of the 1298 matrix and want to provide it to the matrix object to improve the performance 1299 of the MatSetValues() operation. 1300 1301 You MUST have set the correct numbers of nonzeros per row in the call to 1302 MatCreateSeqBAIJ(). 1303 1304 MUST be called before any calls to MatSetValues(); 1305 1306 @*/ 1307 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1308 { 1309 int ierr,(*f)(Mat,int *); 1310 1311 PetscFunctionBegin; 1312 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1313 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1314 if (f) { 1315 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1316 } else { 1317 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 /* -------------------------------------------------------------------*/ 1323 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1324 MatGetRow_SeqBAIJ, 1325 MatRestoreRow_SeqBAIJ, 1326 MatMult_SeqBAIJ_N, 1327 MatMultAdd_SeqBAIJ_N, 1328 MatMultTranspose_SeqBAIJ, 1329 MatMultTransposeAdd_SeqBAIJ, 1330 MatSolve_SeqBAIJ_N, 1331 0, 1332 0, 1333 0, 1334 MatLUFactor_SeqBAIJ, 1335 0, 1336 0, 1337 MatTranspose_SeqBAIJ, 1338 MatGetInfo_SeqBAIJ, 1339 MatEqual_SeqBAIJ, 1340 MatGetDiagonal_SeqBAIJ, 1341 MatDiagonalScale_SeqBAIJ, 1342 MatNorm_SeqBAIJ, 1343 0, 1344 MatAssemblyEnd_SeqBAIJ, 1345 0, 1346 MatSetOption_SeqBAIJ, 1347 MatZeroEntries_SeqBAIJ, 1348 MatZeroRows_SeqBAIJ, 1349 MatLUFactorSymbolic_SeqBAIJ, 1350 MatLUFactorNumeric_SeqBAIJ_N, 1351 0, 1352 0, 1353 MatGetSize_SeqBAIJ, 1354 MatGetSize_SeqBAIJ, 1355 MatGetOwnershipRange_SeqBAIJ, 1356 MatILUFactorSymbolic_SeqBAIJ, 1357 0, 1358 0, 1359 0, 1360 MatDuplicate_SeqBAIJ, 1361 0, 1362 0, 1363 MatILUFactor_SeqBAIJ, 1364 0, 1365 0, 1366 MatGetSubMatrices_SeqBAIJ, 1367 MatIncreaseOverlap_SeqBAIJ, 1368 MatGetValues_SeqBAIJ, 1369 0, 1370 MatPrintHelp_SeqBAIJ, 1371 MatScale_SeqBAIJ, 1372 0, 1373 0, 1374 0, 1375 MatGetBlockSize_SeqBAIJ, 1376 MatGetRowIJ_SeqBAIJ, 1377 MatRestoreRowIJ_SeqBAIJ, 1378 0, 1379 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 MatSetValuesBlocked_SeqBAIJ, 1385 MatGetSubMatrix_SeqBAIJ, 1386 0, 1387 0, 1388 MatGetMaps_Petsc}; 1389 1390 EXTERN_C_BEGIN 1391 #undef __FUNC__ 1392 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1393 int MatStoreValues_SeqBAIJ(Mat mat) 1394 { 1395 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1396 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 if (aij->nonew != 1) { 1401 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1402 } 1403 1404 /* allocate space for values if not already there */ 1405 if (!aij->saved_values) { 1406 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1407 } 1408 1409 /* copy values over */ 1410 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 EXTERN_C_END 1414 1415 EXTERN_C_BEGIN 1416 #undef __FUNC__ 1417 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1418 int MatRetrieveValues_SeqBAIJ(Mat mat) 1419 { 1420 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1421 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1422 1423 PetscFunctionBegin; 1424 if (aij->nonew != 1) { 1425 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1426 } 1427 if (!aij->saved_values) { 1428 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1429 } 1430 1431 /* copy values over */ 1432 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 EXTERN_C_END 1436 1437 #undef __FUNC__ 1438 #define __FUNC__ "MatCreateSeqBAIJ" 1439 /*@C 1440 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1441 compressed row) format. For good matrix assembly performance the 1442 user should preallocate the matrix storage by setting the parameter nz 1443 (or the array nnz). By setting these parameters accurately, performance 1444 during matrix assembly can be increased by more than a factor of 50. 1445 1446 Collective on MPI_Comm 1447 1448 Input Parameters: 1449 + comm - MPI communicator, set to PETSC_COMM_SELF 1450 . bs - size of block 1451 . m - number of rows 1452 . n - number of columns 1453 . nz - number of block nonzeros per block row (same for all rows) 1454 - nnz - array containing the number of block nonzeros in the various block rows 1455 (possibly different for each block row) or PETSC_NULL 1456 1457 Output Parameter: 1458 . A - the matrix 1459 1460 Options Database Keys: 1461 . -mat_no_unroll - uses code that does not unroll the loops in the 1462 block calculations (much slower) 1463 . -mat_block_size - size of the blocks to use 1464 1465 Level: intermediate 1466 1467 Notes: 1468 The block AIJ format is fully compatible with standard Fortran 77 1469 storage. That is, the stored row and column indices can begin at 1470 either one (as in Fortran) or zero. See the users' manual for details. 1471 1472 Specify the preallocated storage with either nz or nnz (not both). 1473 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1474 allocation. For additional details, see the users manual chapter on 1475 matrices. 1476 1477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1478 @*/ 1479 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1480 { 1481 Mat B; 1482 Mat_SeqBAIJ *b; 1483 int i,len,ierr,mbs,nbs,bs2,size; 1484 PetscTruth flg; 1485 1486 PetscFunctionBegin; 1487 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1488 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1489 1490 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1491 mbs = m/bs; 1492 nbs = n/bs; 1493 bs2 = bs*bs; 1494 1495 if (mbs*bs!=m || nbs*bs!=n) { 1496 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1497 } 1498 1499 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1500 if (nnz) { 1501 for (i=0; i<mbs; i++) { 1502 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1503 } 1504 } 1505 1506 *A = 0; 1507 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1508 PLogObjectCreate(B); 1509 B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1510 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1511 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1512 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1513 if (!flg) { 1514 switch (bs) { 1515 case 1: 1516 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1517 B->ops->solve = MatSolve_SeqBAIJ_1; 1518 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1519 B->ops->mult = MatMult_SeqBAIJ_1; 1520 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1521 break; 1522 case 2: 1523 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1524 B->ops->solve = MatSolve_SeqBAIJ_2; 1525 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1526 B->ops->mult = MatMult_SeqBAIJ_2; 1527 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1528 break; 1529 case 3: 1530 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1531 B->ops->solve = MatSolve_SeqBAIJ_3; 1532 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1533 B->ops->mult = MatMult_SeqBAIJ_3; 1534 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1535 break; 1536 case 4: 1537 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1538 B->ops->solve = MatSolve_SeqBAIJ_4; 1539 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1540 B->ops->mult = MatMult_SeqBAIJ_4; 1541 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1542 break; 1543 case 5: 1544 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1545 B->ops->solve = MatSolve_SeqBAIJ_5; 1546 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1547 B->ops->mult = MatMult_SeqBAIJ_5; 1548 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1549 break; 1550 case 6: 1551 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1552 B->ops->solve = MatSolve_SeqBAIJ_6; 1553 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1554 B->ops->mult = MatMult_SeqBAIJ_6; 1555 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1556 break; 1557 case 7: 1558 B->ops->mult = MatMult_SeqBAIJ_7; 1559 B->ops->solve = MatSolve_SeqBAIJ_7; 1560 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 1561 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1562 break; 1563 } 1564 } 1565 B->ops->destroy = MatDestroy_SeqBAIJ; 1566 B->ops->view = MatView_SeqBAIJ; 1567 B->factor = 0; 1568 B->lupivotthreshold = 1.0; 1569 B->mapping = 0; 1570 b->row = 0; 1571 b->col = 0; 1572 b->icol = 0; 1573 b->reallocs = 0; 1574 b->saved_values = 0; 1575 1576 b->m = m; B->m = m; B->M = m; 1577 b->n = n; B->n = n; B->N = n; 1578 1579 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1580 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1581 1582 b->mbs = mbs; 1583 b->nbs = nbs; 1584 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1585 if (!nnz) { 1586 if (nz == PETSC_DEFAULT) nz = 5; 1587 else if (nz <= 0) nz = 1; 1588 for (i=0; i<mbs; i++) b->imax[i] = nz; 1589 nz = nz*mbs; 1590 } else { 1591 nz = 0; 1592 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1593 } 1594 1595 /* allocate the matrix space */ 1596 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1597 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1598 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1599 b->j = (int*)(b->a + nz*bs2); 1600 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1601 b->i = b->j + nz; 1602 b->singlemalloc = PETSC_TRUE; 1603 1604 b->i[0] = 0; 1605 for (i=1; i<mbs+1; i++) { 1606 b->i[i] = b->i[i-1] + b->imax[i-1]; 1607 } 1608 1609 /* b->ilen will count nonzeros in each block row so far. */ 1610 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1611 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1612 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1613 1614 b->bs = bs; 1615 b->bs2 = bs2; 1616 b->mbs = mbs; 1617 b->nz = 0; 1618 b->maxnz = nz*bs2; 1619 b->sorted = PETSC_FALSE; 1620 b->roworiented = PETSC_TRUE; 1621 b->nonew = 0; 1622 b->diag = 0; 1623 b->solve_work = 0; 1624 b->mult_work = 0; 1625 b->spptr = 0; 1626 B->info.nz_unneeded = (PetscReal)b->maxnz; 1627 b->keepzeroedrows = PETSC_FALSE; 1628 1629 *A = B; 1630 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1631 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1632 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1634 "MatStoreValues_SeqBAIJ", 1635 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1637 "MatRetrieveValues_SeqBAIJ", 1638 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1640 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1641 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1642 PetscFunctionReturn(0); 1643 } 1644 1645 #undef __FUNC__ 1646 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1647 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1648 { 1649 Mat C; 1650 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1651 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1652 1653 PetscFunctionBegin; 1654 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1655 1656 *B = 0; 1657 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1658 PLogObjectCreate(C); 1659 C->data = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1660 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1661 C->ops->destroy = MatDestroy_SeqBAIJ; 1662 C->ops->view = MatView_SeqBAIJ; 1663 C->factor = A->factor; 1664 c->row = 0; 1665 c->col = 0; 1666 c->icol = 0; 1667 c->saved_values = 0; 1668 c->keepzeroedrows = a->keepzeroedrows; 1669 C->assembled = PETSC_TRUE; 1670 1671 c->m = C->m = a->m; 1672 c->n = C->n = a->n; 1673 C->M = a->m; 1674 C->N = a->n; 1675 1676 c->bs = a->bs; 1677 c->bs2 = a->bs2; 1678 c->mbs = a->mbs; 1679 c->nbs = a->nbs; 1680 1681 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1682 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1683 for (i=0; i<mbs; i++) { 1684 c->imax[i] = a->imax[i]; 1685 c->ilen[i] = a->ilen[i]; 1686 } 1687 1688 /* allocate the matrix space */ 1689 c->singlemalloc = PETSC_TRUE; 1690 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1691 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1692 c->j = (int*)(c->a + nz*bs2); 1693 c->i = c->j + nz; 1694 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1695 if (mbs > 0) { 1696 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1697 if (cpvalues == MAT_COPY_VALUES) { 1698 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1699 } else { 1700 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1701 } 1702 } 1703 1704 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1705 c->sorted = a->sorted; 1706 c->roworiented = a->roworiented; 1707 c->nonew = a->nonew; 1708 1709 if (a->diag) { 1710 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1711 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1712 for (i=0; i<mbs; i++) { 1713 c->diag[i] = a->diag[i]; 1714 } 1715 } else c->diag = 0; 1716 c->nz = a->nz; 1717 c->maxnz = a->maxnz; 1718 c->solve_work = 0; 1719 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1720 c->mult_work = 0; 1721 *B = C; 1722 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1723 PetscFunctionReturn(0); 1724 } 1725 1726 #undef __FUNC__ 1727 #define __FUNC__ "MatLoad_SeqBAIJ" 1728 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1729 { 1730 Mat_SeqBAIJ *a; 1731 Mat B; 1732 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1733 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1734 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1735 int *masked,nmask,tmp,bs2,ishift; 1736 Scalar *aa; 1737 MPI_Comm comm = ((PetscObject)viewer)->comm; 1738 1739 PetscFunctionBegin; 1740 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1741 bs2 = bs*bs; 1742 1743 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1744 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1745 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1746 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1747 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1748 M = header[1]; N = header[2]; nz = header[3]; 1749 1750 if (header[3] < 0) { 1751 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1752 } 1753 1754 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1755 1756 /* 1757 This code adds extra rows to make sure the number of rows is 1758 divisible by the blocksize 1759 */ 1760 mbs = M/bs; 1761 extra_rows = bs - M + bs*(mbs); 1762 if (extra_rows == bs) extra_rows = 0; 1763 else mbs++; 1764 if (extra_rows) { 1765 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1766 } 1767 1768 /* read in row lengths */ 1769 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1770 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1771 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1772 1773 /* read in column indices */ 1774 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1775 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1776 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1777 1778 /* loop over row lengths determining block row lengths */ 1779 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1780 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1781 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1782 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1783 masked = mask + mbs; 1784 rowcount = 0; nzcount = 0; 1785 for (i=0; i<mbs; i++) { 1786 nmask = 0; 1787 for (j=0; j<bs; j++) { 1788 kmax = rowlengths[rowcount]; 1789 for (k=0; k<kmax; k++) { 1790 tmp = jj[nzcount++]/bs; 1791 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1792 } 1793 rowcount++; 1794 } 1795 browlengths[i] += nmask; 1796 /* zero out the mask elements we set */ 1797 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1798 } 1799 1800 /* create our matrix */ 1801 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1802 B = *A; 1803 a = (Mat_SeqBAIJ*)B->data; 1804 1805 /* set matrix "i" values */ 1806 a->i[0] = 0; 1807 for (i=1; i<= mbs; i++) { 1808 a->i[i] = a->i[i-1] + browlengths[i-1]; 1809 a->ilen[i-1] = browlengths[i-1]; 1810 } 1811 a->nz = 0; 1812 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1813 1814 /* read in nonzero values */ 1815 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1816 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1817 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1818 1819 /* set "a" and "j" values into matrix */ 1820 nzcount = 0; jcount = 0; 1821 for (i=0; i<mbs; i++) { 1822 nzcountb = nzcount; 1823 nmask = 0; 1824 for (j=0; j<bs; j++) { 1825 kmax = rowlengths[i*bs+j]; 1826 for (k=0; k<kmax; k++) { 1827 tmp = jj[nzcount++]/bs; 1828 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1829 } 1830 rowcount++; 1831 } 1832 /* sort the masked values */ 1833 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1834 1835 /* set "j" values into matrix */ 1836 maskcount = 1; 1837 for (j=0; j<nmask; j++) { 1838 a->j[jcount++] = masked[j]; 1839 mask[masked[j]] = maskcount++; 1840 } 1841 /* set "a" values into matrix */ 1842 ishift = bs2*a->i[i]; 1843 for (j=0; j<bs; j++) { 1844 kmax = rowlengths[i*bs+j]; 1845 for (k=0; k<kmax; k++) { 1846 tmp = jj[nzcountb]/bs ; 1847 block = mask[tmp] - 1; 1848 point = jj[nzcountb] - bs*tmp; 1849 idx = ishift + bs2*block + j + bs*point; 1850 a->a[idx] = aa[nzcountb++]; 1851 } 1852 } 1853 /* zero out the mask elements we set */ 1854 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1855 } 1856 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1857 1858 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1859 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1860 ierr = PetscFree(aa);CHKERRQ(ierr); 1861 ierr = PetscFree(jj);CHKERRQ(ierr); 1862 ierr = PetscFree(mask);CHKERRQ(ierr); 1863 1864 B->assembled = PETSC_TRUE; 1865 1866 ierr = MatView_Private(B);CHKERRQ(ierr); 1867 PetscFunctionReturn(0); 1868 } 1869 1870 1871 1872