1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 #if defined(PETSC_HAVE_SPOOLES) 12 EXTERN int MatUseSpooles_SeqSBAIJ(Mat); 13 #endif 14 15 #define CHUNKSIZE 10 16 17 /* 18 Checks for missing diagonals 19 */ 20 #undef __FUNCT__ 21 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 22 int MatMissingDiagonal_SeqSBAIJ(Mat A) 23 { 24 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 25 int *diag,*jj = a->j,i,ierr; 26 27 PetscFunctionBegin; 28 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 29 diag = a->diag; 30 for (i=0; i<a->mbs; i++) { 31 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 32 } 33 PetscFunctionReturn(0); 34 } 35 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 38 int MatMarkDiagonal_SeqSBAIJ(Mat A) 39 { 40 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 41 int i,mbs = a->mbs,ierr; 42 43 PetscFunctionBegin; 44 if (a->diag) PetscFunctionReturn(0); 45 46 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 47 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 48 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 49 PetscFunctionReturn(0); 50 } 51 52 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 57 { 58 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 59 60 PetscFunctionBegin; 61 if (ia) { 62 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 63 } 64 *nn = a->mbs; 65 PetscFunctionReturn(0); 66 } 67 68 #undef __FUNCT__ 69 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 70 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 71 { 72 PetscFunctionBegin; 73 if (!ia) PetscFunctionReturn(0); 74 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 75 /* PetscFunctionReturn(0); */ 76 } 77 78 #undef __FUNCT__ 79 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 80 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 81 { 82 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 83 84 PetscFunctionBegin; 85 *bs = sbaij->bs; 86 PetscFunctionReturn(0); 87 } 88 89 #undef __FUNCT__ 90 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 91 int MatDestroy_SeqSBAIJ(Mat A) 92 { 93 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 94 int ierr; 95 96 PetscFunctionBegin; 97 #if defined(PETSC_USE_LOG) 98 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 99 #endif 100 ierr = PetscFree(a->a);CHKERRQ(ierr); 101 if (!a->singlemalloc) { 102 ierr = PetscFree(a->i);CHKERRQ(ierr); 103 ierr = PetscFree(a->j);CHKERRQ(ierr); 104 } 105 if (a->row) { 106 ierr = ISDestroy(a->row);CHKERRQ(ierr); 107 } 108 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 109 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 110 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 113 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 116 if (a->inew){ 117 ierr = PetscFree(a->inew);CHKERRQ(ierr); 118 a->inew = 0; 119 } 120 ierr = PetscFree(a);CHKERRQ(ierr); 121 PetscFunctionReturn(0); 122 } 123 124 #undef __FUNCT__ 125 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 126 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 127 { 128 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 129 130 PetscFunctionBegin; 131 switch (op) { 132 case MAT_ROW_ORIENTED: 133 a->roworiented = PETSC_TRUE; 134 break; 135 case MAT_COLUMN_ORIENTED: 136 a->roworiented = PETSC_FALSE; 137 break; 138 case MAT_COLUMNS_SORTED: 139 a->sorted = PETSC_TRUE; 140 break; 141 case MAT_COLUMNS_UNSORTED: 142 a->sorted = PETSC_FALSE; 143 break; 144 case MAT_KEEP_ZEROED_ROWS: 145 a->keepzeroedrows = PETSC_TRUE; 146 break; 147 case MAT_NO_NEW_NONZERO_LOCATIONS: 148 a->nonew = 1; 149 break; 150 case MAT_NEW_NONZERO_LOCATION_ERR: 151 a->nonew = -1; 152 break; 153 case MAT_NEW_NONZERO_ALLOCATION_ERR: 154 a->nonew = -2; 155 break; 156 case MAT_YES_NEW_NONZERO_LOCATIONS: 157 a->nonew = 0; 158 break; 159 case MAT_ROWS_SORTED: 160 case MAT_ROWS_UNSORTED: 161 case MAT_YES_NEW_DIAGONALS: 162 case MAT_IGNORE_OFF_PROC_ENTRIES: 163 case MAT_USE_HASH_TABLE: 164 case MAT_USE_SINGLE_PRECISION_SOLVES: 165 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 166 break; 167 case MAT_NO_NEW_DIAGONALS: 168 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 169 default: 170 SETERRQ(PETSC_ERR_SUP,"unknown option"); 171 } 172 PetscFunctionReturn(0); 173 } 174 175 #undef __FUNCT__ 176 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 177 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 178 { 179 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 180 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 181 MatScalar *aa,*aa_i; 182 PetscScalar *v_i; 183 184 PetscFunctionBegin; 185 bs = a->bs; 186 ai = a->i; 187 aj = a->j; 188 aa = a->a; 189 bs2 = a->bs2; 190 191 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 192 193 bn = row/bs; /* Block number */ 194 bp = row % bs; /* Block position */ 195 M = ai[bn+1] - ai[bn]; 196 *ncols = bs*M; 197 198 if (v) { 199 *v = 0; 200 if (*ncols) { 201 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 202 for (i=0; i<M; i++) { /* for each block in the block row */ 203 v_i = *v + i*bs; 204 aa_i = aa + bs2*(ai[bn] + i); 205 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 206 } 207 } 208 } 209 210 if (cols) { 211 *cols = 0; 212 if (*ncols) { 213 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 214 for (i=0; i<M; i++) { /* for each block in the block row */ 215 cols_i = *cols + i*bs; 216 itmp = bs*aj[ai[bn] + i]; 217 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 218 } 219 } 220 } 221 222 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 223 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 224 #ifdef column_search 225 v_i = *v + M*bs; 226 cols_i = *cols + M*bs; 227 for (i=0; i<bn; i++){ /* for each block row */ 228 M = ai[i+1] - ai[i]; 229 for (j=0; j<M; j++){ 230 itmp = aj[ai[i] + j]; /* block column value */ 231 if (itmp == bn){ 232 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 233 for (k=0; k<bs; k++) { 234 *cols_i++ = i*bs+k; 235 *v_i++ = aa_i[k]; 236 } 237 *ncols += bs; 238 break; 239 } 240 } 241 } 242 #endif 243 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 249 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 250 { 251 int ierr; 252 253 PetscFunctionBegin; 254 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 255 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 261 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 262 { 263 int ierr; 264 PetscFunctionBegin; 265 ierr = MatDuplicate(A,MAT_COPY_VALUES,B); 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 271 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 272 { 273 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 274 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 275 PetscScalar *aa; 276 FILE *file; 277 278 PetscFunctionBegin; 279 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 280 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 281 col_lens[0] = MAT_FILE_COOKIE; 282 283 col_lens[1] = A->m; 284 col_lens[2] = A->m; 285 col_lens[3] = a->s_nz*bs2; 286 287 /* store lengths of each row and write (including header) to file */ 288 count = 0; 289 for (i=0; i<a->mbs; i++) { 290 for (j=0; j<bs; j++) { 291 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 292 } 293 } 294 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 295 ierr = PetscFree(col_lens);CHKERRQ(ierr); 296 297 /* store column indices (zero start index) */ 298 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 299 count = 0; 300 for (i=0; i<a->mbs; i++) { 301 for (j=0; j<bs; j++) { 302 for (k=a->i[i]; k<a->i[i+1]; k++) { 303 for (l=0; l<bs; l++) { 304 jj[count++] = bs*a->j[k] + l; 305 } 306 } 307 } 308 } 309 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 310 ierr = PetscFree(jj);CHKERRQ(ierr); 311 312 /* store nonzero values */ 313 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 314 count = 0; 315 for (i=0; i<a->mbs; i++) { 316 for (j=0; j<bs; j++) { 317 for (k=a->i[i]; k<a->i[i+1]; k++) { 318 for (l=0; l<bs; l++) { 319 aa[count++] = a->a[bs2*k + l*bs + j]; 320 } 321 } 322 } 323 } 324 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 325 ierr = PetscFree(aa);CHKERRQ(ierr); 326 327 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 328 if (file) { 329 fprintf(file,"-matload_block_size %d\n",a->bs); 330 } 331 PetscFunctionReturn(0); 332 } 333 334 #undef __FUNCT__ 335 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 336 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 337 { 338 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 339 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 340 char *name; 341 PetscViewerFormat format; 342 343 PetscFunctionBegin; 344 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 345 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 346 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 347 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 348 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 349 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 350 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 351 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 352 for (i=0; i<a->mbs; i++) { 353 for (j=0; j<bs; j++) { 354 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 355 for (k=a->i[i]; k<a->i[i+1]; k++) { 356 for (l=0; l<bs; l++) { 357 #if defined(PETSC_USE_COMPLEX) 358 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 360 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 361 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 362 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 363 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 364 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 365 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 366 } 367 #else 368 if (a->a[bs2*k + l*bs + j] != 0.0) { 369 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 370 } 371 #endif 372 } 373 } 374 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 375 } 376 } 377 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 378 } else { 379 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 380 for (i=0; i<a->mbs; i++) { 381 for (j=0; j<bs; j++) { 382 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 383 for (k=a->i[i]; k<a->i[i+1]; k++) { 384 for (l=0; l<bs; l++) { 385 #if defined(PETSC_USE_COMPLEX) 386 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 390 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 391 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 392 } else { 393 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 394 } 395 #else 396 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 397 #endif 398 } 399 } 400 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 401 } 402 } 403 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 404 } 405 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 411 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 412 { 413 Mat A = (Mat) Aa; 414 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 415 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 416 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 417 MatScalar *aa; 418 MPI_Comm comm; 419 PetscViewer viewer; 420 421 PetscFunctionBegin; 422 /* 423 This is nasty. If this is called from an originally parallel matrix 424 then all processes call this,but only the first has the matrix so the 425 rest should return immediately. 426 */ 427 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 428 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 429 if (rank) PetscFunctionReturn(0); 430 431 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 432 433 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 434 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 435 436 /* loop over matrix elements drawing boxes */ 437 color = PETSC_DRAW_BLUE; 438 for (i=0,row=0; i<mbs; i++,row+=bs) { 439 for (j=a->i[i]; j<a->i[i+1]; j++) { 440 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 441 x_l = a->j[j]*bs; x_r = x_l + 1.0; 442 aa = a->a + j*bs2; 443 for (k=0; k<bs; k++) { 444 for (l=0; l<bs; l++) { 445 if (PetscRealPart(*aa++) >= 0.) continue; 446 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 447 } 448 } 449 } 450 } 451 color = PETSC_DRAW_CYAN; 452 for (i=0,row=0; i<mbs; i++,row+=bs) { 453 for (j=a->i[i]; j<a->i[i+1]; j++) { 454 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 455 x_l = a->j[j]*bs; x_r = x_l + 1.0; 456 aa = a->a + j*bs2; 457 for (k=0; k<bs; k++) { 458 for (l=0; l<bs; l++) { 459 if (PetscRealPart(*aa++) != 0.) continue; 460 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 461 } 462 } 463 } 464 } 465 466 color = PETSC_DRAW_RED; 467 for (i=0,row=0; i<mbs; i++,row+=bs) { 468 for (j=a->i[i]; j<a->i[i+1]; j++) { 469 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 470 x_l = a->j[j]*bs; x_r = x_l + 1.0; 471 aa = a->a + j*bs2; 472 for (k=0; k<bs; k++) { 473 for (l=0; l<bs; l++) { 474 if (PetscRealPart(*aa++) <= 0.) continue; 475 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 476 } 477 } 478 } 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 485 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 486 { 487 int ierr; 488 PetscReal xl,yl,xr,yr,w,h; 489 PetscDraw draw; 490 PetscTruth isnull; 491 492 PetscFunctionBegin; 493 494 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 495 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 496 497 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 498 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 499 xr += w; yr += h; xl = -w; yl = -h; 500 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 501 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ" 508 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 509 { 510 int ierr; 511 PetscTruth issocket,isascii,isbinary,isdraw; 512 513 PetscFunctionBegin; 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 515 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 516 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 517 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 518 if (issocket) { 519 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 520 } else if (isascii){ 521 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 522 } else if (isbinary) { 523 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 524 } else if (isdraw) { 525 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 526 } else { 527 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 535 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 539 int *ai = a->i,*ailen = a->ilen; 540 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 541 MatScalar *ap,*aa = a->a,zero = 0.0; 542 543 PetscFunctionBegin; 544 for (k=0; k<m; k++) { /* loop over rows */ 545 row = im[k]; brow = row/bs; 546 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 547 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 548 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 549 nrow = ailen[brow]; 550 for (l=0; l<n; l++) { /* loop over columns */ 551 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 552 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 553 col = in[l] ; 554 bcol = col/bs; 555 cidx = col%bs; 556 ridx = row%bs; 557 high = nrow; 558 low = 0; /* assume unsorted */ 559 while (high-low > 5) { 560 t = (low+high)/2; 561 if (rp[t] > bcol) high = t; 562 else low = t; 563 } 564 for (i=low; i<high; i++) { 565 if (rp[i] > bcol) break; 566 if (rp[i] == bcol) { 567 *v++ = ap[bs2*i+bs*cidx+ridx]; 568 goto finished; 569 } 570 } 571 *v++ = zero; 572 finished:; 573 } 574 } 575 PetscFunctionReturn(0); 576 } 577 578 579 #undef __FUNCT__ 580 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 581 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 582 { 583 PetscFunctionBegin; 584 SETERRQ(1,"Function not yet written for SBAIJ format"); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 589 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 590 { 591 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 592 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 593 int m = A->m,*ip,N,*ailen = a->ilen; 594 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 595 MatScalar *aa = a->a,*ap; 596 #if defined(PETSC_HAVE_SPOOLES) 597 PetscTruth flag; 598 #endif 599 600 PetscFunctionBegin; 601 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 602 603 if (m) rmax = ailen[0]; 604 for (i=1; i<mbs; i++) { 605 /* move each row back by the amount of empty slots (fshift) before it*/ 606 fshift += imax[i-1] - ailen[i-1]; 607 rmax = PetscMax(rmax,ailen[i]); 608 if (fshift) { 609 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 610 N = ailen[i]; 611 for (j=0; j<N; j++) { 612 ip[j-fshift] = ip[j]; 613 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 614 } 615 } 616 ai[i] = ai[i-1] + ailen[i-1]; 617 } 618 if (mbs) { 619 fshift += imax[mbs-1] - ailen[mbs-1]; 620 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 621 } 622 /* reset ilen and imax for each row */ 623 for (i=0; i<mbs; i++) { 624 ailen[i] = imax[i] = ai[i+1] - ai[i]; 625 } 626 a->s_nz = ai[mbs]; 627 628 /* diagonals may have moved, reset it */ 629 if (a->diag) { 630 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 631 } 632 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 633 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 634 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 635 a->reallocs); 636 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 637 a->reallocs = 0; 638 A->info.nz_unneeded = (PetscReal)fshift*bs2; 639 640 #if defined(PETSC_HAVE_SPOOLES) 641 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 642 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 643 #endif 644 645 PetscFunctionReturn(0); 646 } 647 648 /* 649 This function returns an array of flags which indicate the locations of contiguous 650 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 651 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 652 Assume: sizes should be long enough to hold all the values. 653 */ 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 656 static int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 657 { 658 int i,j,k,row; 659 PetscTruth flg; 660 661 PetscFunctionBegin; 662 for (i=0,j=0; i<n; j++) { 663 row = idx[i]; 664 if (row%bs!=0) { /* Not the begining of a block */ 665 sizes[j] = 1; 666 i++; 667 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 668 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 669 i++; 670 } else { /* Begining of the block, so check if the complete block exists */ 671 flg = PETSC_TRUE; 672 for (k=1; k<bs; k++) { 673 if (row+k != idx[i+k]) { /* break in the block */ 674 flg = PETSC_FALSE; 675 break; 676 } 677 } 678 if (flg == PETSC_TRUE) { /* No break in the bs */ 679 sizes[j] = bs; 680 i+= bs; 681 } else { 682 sizes[j] = 1; 683 i++; 684 } 685 } 686 } 687 *bs_max = j; 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 693 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 694 { 695 Mat_SeqSBAIJ *sbaij=(Mat_SeqSBAIJ*)A->data; 696 int ierr,i,j,k,count,is_n,*is_idx,*rows; 697 int bs=sbaij->bs,bs2=sbaij->bs2,*sizes,row,bs_max; 698 PetscScalar zero = 0.0; 699 MatScalar *aa; 700 701 PetscFunctionBegin; 702 /* Make a copy of the IS and sort it */ 703 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 704 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 705 706 /* allocate memory for rows,sizes */ 707 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 708 sizes = rows + is_n; 709 710 /* initialize copy IS values to rows, and sort them */ 711 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 712 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 713 if (sbaij->keepzeroedrows) { /* do not change nonzero structure */ 714 for (i=0; i<is_n; i++) { sizes[i] = 1; } /* sizes: size of blocks, = 1 or bs */ 715 bs_max = is_n; /* bs_max: num. of contiguous block row in the row */ 716 } else { 717 ierr = MatZeroRows_SeqSBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 718 } 719 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 720 721 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 722 row = rows[j]; /* row to be zeroed */ 723 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 724 count = (sbaij->i[row/bs +1] - sbaij->i[row/bs])*bs; /* num. of elements in the row */ 725 aa = sbaij->a + sbaij->i[row/bs]*bs2 + (row%bs); 726 if (sizes[i] == bs && !sbaij->keepzeroedrows) { 727 if (diag) { 728 if (sbaij->ilen[row/bs] > 0) { 729 sbaij->ilen[row/bs] = 1; 730 sbaij->j[sbaij->i[row/bs]] = row/bs; 731 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 732 } 733 /* Now insert all the diagoanl values for this bs */ 734 for (k=0; k<bs; k++) { 735 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 736 } 737 } else { /* (!diag) */ 738 sbaij->ilen[row/bs] = 0; 739 } /* end (!diag) */ 740 } else { /* (sizes[i] != bs), broken block */ 741 #if defined (PETSC_USE_DEBUG) 742 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 743 #endif 744 for (k=0; k<count; k++) { 745 aa[0] = zero; 746 aa+=bs; 747 } 748 if (diag) { 749 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 750 } 751 } 752 } 753 754 ierr = PetscFree(rows);CHKERRQ(ierr); 755 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 756 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 757 PetscFunctionReturn(0); 758 } 759 760 /* Only add/insert a(i,j) with i<=j (blocks). 761 Any a(i,j) with i>j input by user is ingored. 762 */ 763 764 #undef __FUNCT__ 765 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 766 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 767 { 768 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 769 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 770 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 771 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 772 int ridx,cidx,bs2=a->bs2,ierr; 773 MatScalar *ap,value,*aa=a->a,*bap; 774 775 PetscFunctionBegin; 776 777 for (k=0; k<m; k++) { /* loop over added rows */ 778 row = im[k]; /* row number */ 779 brow = row/bs; /* block row number */ 780 if (row < 0) continue; 781 #if defined(PETSC_USE_BOPT_g) 782 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 783 #endif 784 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 785 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 786 rmax = imax[brow]; /* maximum space allocated for this row */ 787 nrow = ailen[brow]; /* actual length of this row */ 788 low = 0; 789 790 for (l=0; l<n; l++) { /* loop over added columns */ 791 if (in[l] < 0) continue; 792 #if defined(PETSC_USE_BOPT_g) 793 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 794 #endif 795 col = in[l]; 796 bcol = col/bs; /* block col number */ 797 798 if (brow <= bcol){ 799 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 800 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 801 /* element value a(k,l) */ 802 if (roworiented) { 803 value = v[l + k*n]; 804 } else { 805 value = v[k + l*m]; 806 } 807 808 /* move pointer bap to a(k,l) quickly and add/insert value */ 809 if (!sorted) low = 0; high = nrow; 810 while (high-low > 7) { 811 t = (low+high)/2; 812 if (rp[t] > bcol) high = t; 813 else low = t; 814 } 815 for (i=low; i<high; i++) { 816 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 817 if (rp[i] > bcol) break; 818 if (rp[i] == bcol) { 819 bap = ap + bs2*i + bs*cidx + ridx; 820 if (is == ADD_VALUES) *bap += value; 821 else *bap = value; 822 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 823 if (brow == bcol && ridx < cidx){ 824 bap = ap + bs2*i + bs*ridx + cidx; 825 if (is == ADD_VALUES) *bap += value; 826 else *bap = value; 827 } 828 goto noinsert1; 829 } 830 } 831 832 if (nonew == 1) goto noinsert1; 833 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 834 if (nrow >= rmax) { 835 /* there is no extra room in row, therefore enlarge */ 836 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 837 MatScalar *new_a; 838 839 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 840 841 /* Malloc new storage space */ 842 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 843 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 844 new_j = (int*)(new_a + bs2*new_nz); 845 new_i = new_j + new_nz; 846 847 /* copy over old data into new slots */ 848 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 849 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 850 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 851 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 852 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 853 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 854 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 855 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 856 /* free up old matrix storage */ 857 ierr = PetscFree(a->a);CHKERRQ(ierr); 858 if (!a->singlemalloc) { 859 ierr = PetscFree(a->i);CHKERRQ(ierr); 860 ierr = PetscFree(a->j);CHKERRQ(ierr); 861 } 862 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 863 a->singlemalloc = PETSC_TRUE; 864 865 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 866 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 867 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 868 a->s_maxnz += bs2*CHUNKSIZE; 869 a->reallocs++; 870 a->s_nz++; 871 } 872 873 N = nrow++ - 1; 874 /* shift up all the later entries in this row */ 875 for (ii=N; ii>=i; ii--) { 876 rp[ii+1] = rp[ii]; 877 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 878 } 879 if (N>=i) { 880 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 } 882 rp[i] = bcol; 883 ap[bs2*i + bs*cidx + ridx] = value; 884 noinsert1:; 885 low = i; 886 /* } */ 887 } 888 } /* end of if .. if.. */ 889 } /* end of loop over added columns */ 890 ailen[brow] = nrow; 891 } /* end of loop over added rows */ 892 893 PetscFunctionReturn(0); 894 } 895 896 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 897 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 898 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 899 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 900 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 901 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 902 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 903 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 904 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 905 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 906 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 907 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 908 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 909 extern int MatZeroEntries_SeqSBAIJ(Mat); 910 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 911 912 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 913 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 914 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 915 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 916 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 917 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 918 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 919 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 920 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 921 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 922 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 923 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 924 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 925 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 926 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 927 928 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 929 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 930 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 931 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 932 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 933 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 934 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 935 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 936 937 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 938 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 939 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 940 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 941 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 942 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 943 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 944 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 945 946 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 947 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 948 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 949 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 950 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 951 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 952 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 953 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 954 955 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 956 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 957 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 958 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 959 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 960 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 961 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 962 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 963 964 #ifdef HAVE_MatICCFactor 965 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 968 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 969 { 970 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 971 Mat outA; 972 int ierr; 973 PetscTruth row_identity,col_identity; 974 975 PetscFunctionBegin; 976 /* 977 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 978 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 979 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 980 if (!row_identity || !col_identity) { 981 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 982 } 983 */ 984 985 outA = inA; 986 inA->factor = FACTOR_CHOLESKY; 987 988 if (!a->diag) { 989 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 990 } 991 /* 992 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 993 for ILU(0) factorization with natural ordering 994 */ 995 switch (a->bs) { 996 case 1: 997 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 998 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 999 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1000 case 2: 1001 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1002 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1003 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1004 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1005 break; 1006 case 3: 1007 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1008 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1009 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1010 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1011 break; 1012 case 4: 1013 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1014 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1015 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1016 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1017 break; 1018 case 5: 1019 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1020 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1021 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1022 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1023 break; 1024 case 6: 1025 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1026 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1027 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1028 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1029 break; 1030 case 7: 1031 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1032 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1033 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1034 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1035 break; 1036 default: 1037 a->row = row; 1038 a->icol = col; 1039 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1040 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1041 1042 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1043 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1044 PetscLogObjectParent(inA,a->icol); 1045 1046 if (!a->solve_work) { 1047 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1048 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1049 } 1050 } 1051 1052 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1053 1054 PetscFunctionReturn(0); 1055 } 1056 #endif 1057 1058 #undef __FUNCT__ 1059 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1060 int MatPrintHelp_SeqSBAIJ(Mat A) 1061 { 1062 static PetscTruth called = PETSC_FALSE; 1063 MPI_Comm comm = A->comm; 1064 int ierr; 1065 1066 PetscFunctionBegin; 1067 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1068 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1069 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1070 PetscFunctionReturn(0); 1071 } 1072 1073 EXTERN_C_BEGIN 1074 #undef __FUNCT__ 1075 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1076 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1077 { 1078 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1079 int i,nz,n; 1080 1081 PetscFunctionBegin; 1082 nz = baij->s_maxnz; 1083 n = mat->n; 1084 for (i=0; i<nz; i++) { 1085 baij->j[i] = indices[i]; 1086 } 1087 baij->s_nz = nz; 1088 for (i=0; i<n; i++) { 1089 baij->ilen[i] = baij->imax[i]; 1090 } 1091 1092 PetscFunctionReturn(0); 1093 } 1094 EXTERN_C_END 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1098 /*@ 1099 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1100 in the matrix. 1101 1102 Input Parameters: 1103 + mat - the SeqSBAIJ matrix 1104 - indices - the column indices 1105 1106 Level: advanced 1107 1108 Notes: 1109 This can be called if you have precomputed the nonzero structure of the 1110 matrix and want to provide it to the matrix object to improve the performance 1111 of the MatSetValues() operation. 1112 1113 You MUST have set the correct numbers of nonzeros per row in the call to 1114 MatCreateSeqSBAIJ(). 1115 1116 MUST be called before any calls to MatSetValues() 1117 1118 .seealso: MatCreateSeqSBAIJ 1119 @*/ 1120 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1121 { 1122 int ierr,(*f)(Mat,int *); 1123 1124 PetscFunctionBegin; 1125 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1126 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1127 if (f) { 1128 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1129 } else { 1130 SETERRQ(1,"Wrong type of matrix to set column indices"); 1131 } 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1137 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1138 { 1139 int ierr; 1140 1141 PetscFunctionBegin; 1142 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 /* -------------------------------------------------------------------*/ 1147 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1148 MatGetRow_SeqSBAIJ, 1149 MatRestoreRow_SeqSBAIJ, 1150 MatMult_SeqSBAIJ_N, 1151 MatMultAdd_SeqSBAIJ_N, 1152 MatMultTranspose_SeqSBAIJ, 1153 MatMultTransposeAdd_SeqSBAIJ, 1154 MatSolve_SeqSBAIJ_N, 1155 0, 1156 0, 1157 0, 1158 0, 1159 MatCholeskyFactor_SeqSBAIJ, 1160 MatRelax_SeqSBAIJ, 1161 MatTranspose_SeqSBAIJ, 1162 MatGetInfo_SeqSBAIJ, 1163 MatEqual_SeqSBAIJ, 1164 MatGetDiagonal_SeqSBAIJ, 1165 MatDiagonalScale_SeqSBAIJ, 1166 MatNorm_SeqSBAIJ, 1167 0, 1168 MatAssemblyEnd_SeqSBAIJ, 1169 0, 1170 MatSetOption_SeqSBAIJ, 1171 MatZeroEntries_SeqSBAIJ, 1172 MatZeroRows_SeqSBAIJ, 1173 0, 1174 0, 1175 MatCholeskyFactorSymbolic_SeqSBAIJ, 1176 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1177 MatSetUpPreallocation_SeqSBAIJ, 1178 0, 1179 MatICCFactorSymbolic_SeqSBAIJ, 1180 0, 1181 0, 1182 MatDuplicate_SeqSBAIJ, 1183 0, 1184 0, 1185 0, 1186 0, 1187 0, 1188 MatGetSubMatrices_SeqSBAIJ, 1189 MatIncreaseOverlap_SeqSBAIJ, 1190 MatGetValues_SeqSBAIJ, 1191 0, 1192 MatPrintHelp_SeqSBAIJ, 1193 MatScale_SeqSBAIJ, 1194 0, 1195 0, 1196 0, 1197 MatGetBlockSize_SeqSBAIJ, 1198 MatGetRowIJ_SeqSBAIJ, 1199 MatRestoreRowIJ_SeqSBAIJ, 1200 0, 1201 0, 1202 0, 1203 0, 1204 0, 1205 0, 1206 MatSetValuesBlocked_SeqSBAIJ, 1207 MatGetSubMatrix_SeqSBAIJ, 1208 0, 1209 0, 1210 MatGetPetscMaps_Petsc, 1211 0, 1212 0, 1213 0, 1214 0, 1215 0, 1216 0, 1217 MatGetRowMax_SeqSBAIJ}; 1218 1219 EXTERN_C_BEGIN 1220 #undef __FUNCT__ 1221 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1222 int MatStoreValues_SeqSBAIJ(Mat mat) 1223 { 1224 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1225 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1226 int ierr; 1227 1228 PetscFunctionBegin; 1229 if (aij->nonew != 1) { 1230 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1231 } 1232 1233 /* allocate space for values if not already there */ 1234 if (!aij->saved_values) { 1235 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1236 } 1237 1238 /* copy values over */ 1239 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1240 PetscFunctionReturn(0); 1241 } 1242 EXTERN_C_END 1243 1244 EXTERN_C_BEGIN 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1247 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1248 { 1249 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1250 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1251 1252 PetscFunctionBegin; 1253 if (aij->nonew != 1) { 1254 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1255 } 1256 if (!aij->saved_values) { 1257 SETERRQ(1,"Must call MatStoreValues(A);first"); 1258 } 1259 1260 /* copy values over */ 1261 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1262 PetscFunctionReturn(0); 1263 } 1264 EXTERN_C_END 1265 1266 EXTERN_C_BEGIN 1267 #undef __FUNCT__ 1268 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1269 int MatCreate_SeqSBAIJ(Mat B) 1270 { 1271 Mat_SeqSBAIJ *b; 1272 int ierr,size; 1273 1274 PetscFunctionBegin; 1275 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1276 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1277 B->m = B->M = PetscMax(B->m,B->M); 1278 B->n = B->N = PetscMax(B->n,B->N); 1279 1280 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1281 B->data = (void*)b; 1282 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1283 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1284 B->ops->destroy = MatDestroy_SeqSBAIJ; 1285 B->ops->view = MatView_SeqSBAIJ; 1286 B->factor = 0; 1287 B->lupivotthreshold = 1.0; 1288 B->mapping = 0; 1289 b->row = 0; 1290 b->icol = 0; 1291 b->reallocs = 0; 1292 b->saved_values = 0; 1293 1294 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1295 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1296 1297 b->sorted = PETSC_FALSE; 1298 b->roworiented = PETSC_TRUE; 1299 b->nonew = 0; 1300 b->diag = 0; 1301 b->solve_work = 0; 1302 b->mult_work = 0; 1303 B->spptr = 0; 1304 b->keepzeroedrows = PETSC_FALSE; 1305 1306 b->inew = 0; 1307 b->jnew = 0; 1308 b->anew = 0; 1309 b->a2anew = 0; 1310 b->permute = PETSC_FALSE; 1311 1312 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1313 "MatStoreValues_SeqSBAIJ", 1314 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1315 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1316 "MatRetrieveValues_SeqSBAIJ", 1317 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1318 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1319 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1320 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1321 PetscFunctionReturn(0); 1322 } 1323 EXTERN_C_END 1324 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1327 /*@C 1328 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1329 compressed row) format. For good matrix assembly performance the 1330 user should preallocate the matrix storage by setting the parameter nz 1331 (or the array nnz). By setting these parameters accurately, performance 1332 during matrix assembly can be increased by more than a factor of 50. 1333 1334 Collective on Mat 1335 1336 Input Parameters: 1337 + A - the symmetric matrix 1338 . bs - size of block 1339 . nz - number of block nonzeros per block row (same for all rows) 1340 - nnz - array containing the number of block nonzeros in the upper triangular plus 1341 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1342 1343 Options Database Keys: 1344 . -mat_no_unroll - uses code that does not unroll the loops in the 1345 block calculations (much slower) 1346 . -mat_block_size - size of the blocks to use 1347 1348 Level: intermediate 1349 1350 Notes: 1351 Specify the preallocated storage with either nz or nnz (not both). 1352 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1353 allocation. For additional details, see the users manual chapter on 1354 matrices. 1355 1356 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1357 @*/ 1358 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1359 { 1360 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1361 int i,len,ierr,mbs,bs2; 1362 PetscTruth flg; 1363 int s_nz; 1364 1365 PetscFunctionBegin; 1366 B->preallocated = PETSC_TRUE; 1367 ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1368 mbs = B->m/bs; 1369 bs2 = bs*bs; 1370 1371 if (mbs*bs != B->m) { 1372 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1373 } 1374 1375 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1376 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1377 if (nnz) { 1378 for (i=0; i<mbs; i++) { 1379 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1380 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1381 } 1382 } 1383 1384 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1385 if (!flg) { 1386 switch (bs) { 1387 case 1: 1388 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1389 B->ops->solve = MatSolve_SeqSBAIJ_1; 1390 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1391 B->ops->mult = MatMult_SeqSBAIJ_1; 1392 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1393 break; 1394 case 2: 1395 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1396 B->ops->solve = MatSolve_SeqSBAIJ_2; 1397 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1398 B->ops->mult = MatMult_SeqSBAIJ_2; 1399 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1400 break; 1401 case 3: 1402 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1403 B->ops->solve = MatSolve_SeqSBAIJ_3; 1404 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1405 B->ops->mult = MatMult_SeqSBAIJ_3; 1406 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1407 break; 1408 case 4: 1409 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1410 B->ops->solve = MatSolve_SeqSBAIJ_4; 1411 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1412 B->ops->mult = MatMult_SeqSBAIJ_4; 1413 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1414 break; 1415 case 5: 1416 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1417 B->ops->solve = MatSolve_SeqSBAIJ_5; 1418 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1419 B->ops->mult = MatMult_SeqSBAIJ_5; 1420 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1421 break; 1422 case 6: 1423 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1424 B->ops->solve = MatSolve_SeqSBAIJ_6; 1425 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1426 B->ops->mult = MatMult_SeqSBAIJ_6; 1427 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1428 break; 1429 case 7: 1430 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1431 B->ops->solve = MatSolve_SeqSBAIJ_7; 1432 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1433 B->ops->mult = MatMult_SeqSBAIJ_7; 1434 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1435 break; 1436 } 1437 } 1438 1439 b->mbs = mbs; 1440 b->nbs = mbs; 1441 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1442 if (!nnz) { 1443 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1444 else if (nz <= 0) nz = 1; 1445 for (i=0; i<mbs; i++) { 1446 b->imax[i] = nz; 1447 } 1448 nz = nz*mbs; /* total nz */ 1449 } else { 1450 nz = 0; 1451 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1452 } 1453 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1454 s_nz = nz; 1455 1456 /* allocate the matrix space */ 1457 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1458 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1459 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1460 b->j = (int*)(b->a + s_nz*bs2); 1461 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1462 b->i = b->j + s_nz; 1463 b->singlemalloc = PETSC_TRUE; 1464 1465 /* pointer to beginning of each row */ 1466 b->i[0] = 0; 1467 for (i=1; i<mbs+1; i++) { 1468 b->i[i] = b->i[i-1] + b->imax[i-1]; 1469 } 1470 1471 /* b->ilen will count nonzeros in each block row so far. */ 1472 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1473 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1474 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1475 1476 b->bs = bs; 1477 b->bs2 = bs2; 1478 b->s_nz = 0; 1479 b->s_maxnz = s_nz*bs2; 1480 1481 b->inew = 0; 1482 b->jnew = 0; 1483 b->anew = 0; 1484 b->a2anew = 0; 1485 b->permute = PETSC_FALSE; 1486 PetscFunctionReturn(0); 1487 } 1488 1489 1490 #undef __FUNCT__ 1491 #define __FUNCT__ "MatCreateSeqSBAIJ" 1492 /*@C 1493 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1494 compressed row) format. For good matrix assembly performance the 1495 user should preallocate the matrix storage by setting the parameter nz 1496 (or the array nnz). By setting these parameters accurately, performance 1497 during matrix assembly can be increased by more than a factor of 50. 1498 1499 Collective on MPI_Comm 1500 1501 Input Parameters: 1502 + comm - MPI communicator, set to PETSC_COMM_SELF 1503 . bs - size of block 1504 . m - number of rows, or number of columns 1505 . nz - number of block nonzeros per block row (same for all rows) 1506 - nnz - array containing the number of block nonzeros in the upper triangular plus 1507 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1508 1509 Output Parameter: 1510 . A - the symmetric matrix 1511 1512 Options Database Keys: 1513 . -mat_no_unroll - uses code that does not unroll the loops in the 1514 block calculations (much slower) 1515 . -mat_block_size - size of the blocks to use 1516 1517 Level: intermediate 1518 1519 Notes: 1520 1521 Specify the preallocated storage with either nz or nnz (not both). 1522 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1523 allocation. For additional details, see the users manual chapter on 1524 matrices. 1525 1526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1527 @*/ 1528 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1529 { 1530 int ierr; 1531 1532 PetscFunctionBegin; 1533 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1534 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1535 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1541 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1542 { 1543 Mat C; 1544 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1545 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1546 1547 PetscFunctionBegin; 1548 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1549 1550 *B = 0; 1551 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1552 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1553 c = (Mat_SeqSBAIJ*)C->data; 1554 1555 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1556 C->preallocated = PETSC_TRUE; 1557 C->factor = A->factor; 1558 c->row = 0; 1559 c->icol = 0; 1560 c->saved_values = 0; 1561 c->keepzeroedrows = a->keepzeroedrows; 1562 C->assembled = PETSC_TRUE; 1563 1564 c->bs = a->bs; 1565 c->bs2 = a->bs2; 1566 c->mbs = a->mbs; 1567 c->nbs = a->nbs; 1568 1569 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1570 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1571 for (i=0; i<mbs; i++) { 1572 c->imax[i] = a->imax[i]; 1573 c->ilen[i] = a->ilen[i]; 1574 } 1575 1576 /* allocate the matrix space */ 1577 c->singlemalloc = PETSC_TRUE; 1578 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1579 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1580 c->j = (int*)(c->a + nz*bs2); 1581 c->i = c->j + nz; 1582 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1583 if (mbs > 0) { 1584 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1585 if (cpvalues == MAT_COPY_VALUES) { 1586 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1587 } else { 1588 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1589 } 1590 } 1591 1592 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1593 c->sorted = a->sorted; 1594 c->roworiented = a->roworiented; 1595 c->nonew = a->nonew; 1596 1597 if (a->diag) { 1598 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1599 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1600 for (i=0; i<mbs; i++) { 1601 c->diag[i] = a->diag[i]; 1602 } 1603 } else c->diag = 0; 1604 c->s_nz = a->s_nz; 1605 c->s_maxnz = a->s_maxnz; 1606 c->solve_work = 0; 1607 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1608 c->mult_work = 0; 1609 *B = C; 1610 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1611 PetscFunctionReturn(0); 1612 } 1613 1614 EXTERN_C_BEGIN 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1617 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1618 { 1619 Mat_SeqSBAIJ *a; 1620 Mat B; 1621 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1622 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1623 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1624 int *masked,nmask,tmp,bs2,ishift; 1625 PetscScalar *aa; 1626 MPI_Comm comm = ((PetscObject)viewer)->comm; 1627 1628 PetscFunctionBegin; 1629 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1630 bs2 = bs*bs; 1631 1632 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1633 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1634 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1635 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1636 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1637 M = header[1]; N = header[2]; nz = header[3]; 1638 1639 if (header[3] < 0) { 1640 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1641 } 1642 1643 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1644 1645 /* 1646 This code adds extra rows to make sure the number of rows is 1647 divisible by the blocksize 1648 */ 1649 mbs = M/bs; 1650 extra_rows = bs - M + bs*(mbs); 1651 if (extra_rows == bs) extra_rows = 0; 1652 else mbs++; 1653 if (extra_rows) { 1654 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1655 } 1656 1657 /* read in row lengths */ 1658 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1659 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1660 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1661 1662 /* read in column indices */ 1663 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1664 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1665 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1666 1667 /* loop over row lengths determining block row lengths */ 1668 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1669 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1670 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1671 masked = mask + mbs; 1672 rowcount = 0; nzcount = 0; 1673 for (i=0; i<mbs; i++) { 1674 nmask = 0; 1675 for (j=0; j<bs; j++) { 1676 kmax = rowlengths[rowcount]; 1677 for (k=0; k<kmax; k++) { 1678 tmp = jj[nzcount++]/bs; /* block col. index */ 1679 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1680 } 1681 rowcount++; 1682 } 1683 s_browlengths[i] += nmask; 1684 1685 /* zero out the mask elements we set */ 1686 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1687 } 1688 1689 /* create our matrix */ 1690 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1691 B = *A; 1692 a = (Mat_SeqSBAIJ*)B->data; 1693 1694 /* set matrix "i" values */ 1695 a->i[0] = 0; 1696 for (i=1; i<= mbs; i++) { 1697 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1698 a->ilen[i-1] = s_browlengths[i-1]; 1699 } 1700 a->s_nz = a->i[mbs]; 1701 1702 /* read in nonzero values */ 1703 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1704 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1705 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1706 1707 /* set "a" and "j" values into matrix */ 1708 nzcount = 0; jcount = 0; 1709 for (i=0; i<mbs; i++) { 1710 nzcountb = nzcount; 1711 nmask = 0; 1712 for (j=0; j<bs; j++) { 1713 kmax = rowlengths[i*bs+j]; 1714 for (k=0; k<kmax; k++) { 1715 tmp = jj[nzcount++]/bs; /* block col. index */ 1716 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1717 } 1718 } 1719 /* sort the masked values */ 1720 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1721 1722 /* set "j" values into matrix */ 1723 maskcount = 1; 1724 for (j=0; j<nmask; j++) { 1725 a->j[jcount++] = masked[j]; 1726 mask[masked[j]] = maskcount++; 1727 } 1728 1729 /* set "a" values into matrix */ 1730 ishift = bs2*a->i[i]; 1731 for (j=0; j<bs; j++) { 1732 kmax = rowlengths[i*bs+j]; 1733 for (k=0; k<kmax; k++) { 1734 tmp = jj[nzcountb]/bs ; /* block col. index */ 1735 if (tmp >= i){ 1736 block = mask[tmp] - 1; 1737 point = jj[nzcountb] - bs*tmp; 1738 idx = ishift + bs2*block + j + bs*point; 1739 a->a[idx] = aa[nzcountb]; 1740 } 1741 nzcountb++; 1742 } 1743 } 1744 /* zero out the mask elements we set */ 1745 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1746 } 1747 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1748 1749 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1750 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1751 ierr = PetscFree(aa);CHKERRQ(ierr); 1752 ierr = PetscFree(jj);CHKERRQ(ierr); 1753 ierr = PetscFree(mask);CHKERRQ(ierr); 1754 1755 B->assembled = PETSC_TRUE; 1756 ierr = MatView_Private(B);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 EXTERN_C_END 1760 1761 #undef __FUNCT__ 1762 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1763 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1764 { 1765 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1766 MatScalar *aa=a->a,*v,*v1; 1767 PetscScalar *x,*b,*t,sum,d; 1768 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1769 int nz,nz1,*vj,*vj1,i; 1770 1771 PetscFunctionBegin; 1772 its = its*lits; 1773 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1774 1775 if (bs > 1) 1776 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1777 1778 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1779 if (xx != bb) { 1780 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1781 } else { 1782 b = x; 1783 } 1784 1785 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1786 1787 if (flag & SOR_ZERO_INITIAL_GUESS) { 1788 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1789 for (i=0; i<m; i++) 1790 t[i] = b[i]; 1791 1792 for (i=0; i<m; i++){ 1793 d = *(aa + ai[i]); /* diag[i] */ 1794 v = aa + ai[i] + 1; 1795 vj = aj + ai[i] + 1; 1796 nz = ai[i+1] - ai[i] - 1; 1797 x[i] = omega*t[i]/d; 1798 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1799 PetscLogFlops(2*nz-1); 1800 } 1801 } 1802 1803 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1804 for (i=0; i<m; i++) 1805 t[i] = b[i]; 1806 1807 for (i=0; i<m-1; i++){ /* update rhs */ 1808 v = aa + ai[i] + 1; 1809 vj = aj + ai[i] + 1; 1810 nz = ai[i+1] - ai[i] - 1; 1811 while (nz--) t[*vj++] -= x[i]*(*v++); 1812 PetscLogFlops(2*nz-1); 1813 } 1814 for (i=m-1; i>=0; i--){ 1815 d = *(aa + ai[i]); 1816 v = aa + ai[i] + 1; 1817 vj = aj + ai[i] + 1; 1818 nz = ai[i+1] - ai[i] - 1; 1819 sum = t[i]; 1820 while (nz--) sum -= x[*vj++]*(*v++); 1821 PetscLogFlops(2*nz-1); 1822 x[i] = (1-omega)*x[i] + omega*sum/d; 1823 } 1824 } 1825 its--; 1826 } 1827 1828 while (its--) { 1829 /* 1830 forward sweep: 1831 for i=0,...,m-1: 1832 sum[i] = (b[i] - U(i,:)x )/d[i]; 1833 x[i] = (1-omega)x[i] + omega*sum[i]; 1834 b = b - x[i]*U^T(i,:); 1835 1836 */ 1837 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1838 for (i=0; i<m; i++) 1839 t[i] = b[i]; 1840 1841 for (i=0; i<m; i++){ 1842 d = *(aa + ai[i]); /* diag[i] */ 1843 v = aa + ai[i] + 1; v1=v; 1844 vj = aj + ai[i] + 1; vj1=vj; 1845 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1846 sum = t[i]; 1847 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1848 x[i] = (1-omega)*x[i] + omega*sum/d; 1849 while (nz--) t[*vj++] -= x[i]*(*v++); 1850 PetscLogFlops(4*nz-2); 1851 } 1852 } 1853 1854 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1855 /* 1856 backward sweep: 1857 b = b - x[i]*U^T(i,:), i=0,...,n-2 1858 for i=m-1,...,0: 1859 sum[i] = (b[i] - U(i,:)x )/d[i]; 1860 x[i] = (1-omega)x[i] + omega*sum[i]; 1861 */ 1862 for (i=0; i<m; i++) 1863 t[i] = b[i]; 1864 1865 for (i=0; i<m-1; i++){ /* update rhs */ 1866 v = aa + ai[i] + 1; 1867 vj = aj + ai[i] + 1; 1868 nz = ai[i+1] - ai[i] - 1; 1869 while (nz--) t[*vj++] -= x[i]*(*v++); 1870 PetscLogFlops(2*nz-1); 1871 } 1872 for (i=m-1; i>=0; i--){ 1873 d = *(aa + ai[i]); 1874 v = aa + ai[i] + 1; 1875 vj = aj + ai[i] + 1; 1876 nz = ai[i+1] - ai[i] - 1; 1877 sum = t[i]; 1878 while (nz--) sum -= x[*vj++]*(*v++); 1879 PetscLogFlops(2*nz-1); 1880 x[i] = (1-omega)*x[i] + omega*sum/d; 1881 } 1882 } 1883 } 1884 1885 ierr = PetscFree(t); CHKERRQ(ierr); 1886 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1887 if (bb != xx) { 1888 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1889 } 1890 1891 PetscFunctionReturn(0); 1892 } 1893 1894 1895 1896 1897 1898 1899