1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 /* extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); */ 50 51 #undef __FUNCT__ 52 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 53 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 54 { 55 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 56 57 PetscFunctionBegin; 58 59 if (ia) { 60 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 61 } 62 63 #ifdef NEW 64 int ierr; 65 66 if (!ia) PetscFunctionReturn(0); 67 68 /* 69 if (symmetric) { 70 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 71 } else if (oshift == 1) { 72 int nz = a->i[n]; 73 for (i=0; i<nz; i++) a->j[i]++; 74 for (i=0; i<n+1; i++) a->i[i]++; 75 *ia = a->i; *ja = a->j; 76 } else { 77 */ 78 *ia = a->i; *ja = a->j; 79 /* } */ 80 #endif 81 *nn = a->mbs; 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 88 { 89 PetscFunctionBegin; 90 if (!ia) PetscFunctionReturn(0); 91 SETERRQ(1,"Function not yet written for SBAIJ format, only supports natural ordering"); 92 /* PetscFunctionReturn(0); */ 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 97 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 98 { 99 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 100 101 PetscFunctionBegin; 102 *bs = sbaij->bs; 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 int MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 int ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%d, s_NZ=%d",A->m,a->s_nz); 116 #endif 117 ierr = PetscFree(a->a);CHKERRQ(ierr); 118 if (!a->singlemalloc) { 119 ierr = PetscFree(a->i);CHKERRQ(ierr); 120 ierr = PetscFree(a->j);CHKERRQ(ierr); 121 } 122 if (a->row) { 123 ierr = ISDestroy(a->row);CHKERRQ(ierr); 124 } 125 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 126 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 127 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 128 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 129 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 130 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 131 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 132 133 if (a->inew){ 134 ierr = PetscFree(a->inew);CHKERRQ(ierr); 135 a->inew = 0; 136 } 137 ierr = PetscFree(a);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 141 #undef __FUNCT__ 142 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 143 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 144 { 145 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 146 147 PetscFunctionBegin; 148 switch (op) { 149 case MAT_ROW_ORIENTED: 150 a->roworiented = PETSC_TRUE; 151 break; 152 case MAT_COLUMN_ORIENTED: 153 a->roworiented = PETSC_FALSE; 154 break; 155 case MAT_COLUMNS_SORTED: 156 a->sorted = PETSC_TRUE; 157 break; 158 case MAT_COLUMNS_UNSORTED: 159 a->sorted = PETSC_FALSE; 160 break; 161 case MAT_KEEP_ZEROED_ROWS: 162 a->keepzeroedrows = PETSC_TRUE; 163 break; 164 case MAT_NO_NEW_NONZERO_LOCATIONS: 165 a->nonew = 1; 166 break; 167 case MAT_NEW_NONZERO_LOCATION_ERR: 168 a->nonew = -1; 169 break; 170 case MAT_NEW_NONZERO_ALLOCATION_ERR: 171 a->nonew = -2; 172 break; 173 case MAT_YES_NEW_NONZERO_LOCATIONS: 174 a->nonew = 0; 175 break; 176 case MAT_ROWS_SORTED: 177 case MAT_ROWS_UNSORTED: 178 case MAT_YES_NEW_DIAGONALS: 179 case MAT_IGNORE_OFF_PROC_ENTRIES: 180 case MAT_USE_HASH_TABLE: 181 case MAT_USE_SINGLE_PRECISION_SOLVES: 182 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 183 break; 184 case MAT_NO_NEW_DIAGONALS: 185 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 198 MatScalar *aa,*aa_i; 199 PetscScalar *v_i; 200 201 PetscFunctionBegin; 202 bs = a->bs; 203 ai = a->i; 204 aj = a->j; 205 aa = a->a; 206 bs2 = a->bs2; 207 208 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 209 210 bn = row/bs; /* Block number */ 211 bp = row % bs; /* Block position */ 212 M = ai[bn+1] - ai[bn]; 213 *ncols = bs*M; 214 215 if (v) { 216 *v = 0; 217 if (*ncols) { 218 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 219 for (i=0; i<M; i++) { /* for each block in the block row */ 220 v_i = *v + i*bs; 221 aa_i = aa + bs2*(ai[bn] + i); 222 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 223 } 224 } 225 } 226 227 if (cols) { 228 *cols = 0; 229 if (*ncols) { 230 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 231 for (i=0; i<M; i++) { /* for each block in the block row */ 232 cols_i = *cols + i*bs; 233 itmp = bs*aj[ai[bn] + i]; 234 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 235 } 236 } 237 } 238 239 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 240 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 241 #ifdef column_search 242 v_i = *v + M*bs; 243 cols_i = *cols + M*bs; 244 for (i=0; i<bn; i++){ /* for each block row */ 245 M = ai[i+1] - ai[i]; 246 for (j=0; j<M; j++){ 247 itmp = aj[ai[i] + j]; /* block column value */ 248 if (itmp == bn){ 249 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 250 for (k=0; k<bs; k++) { 251 *cols_i++ = i*bs+k; 252 *v_i++ = aa_i[k]; 253 } 254 *ncols += bs; 255 break; 256 } 257 } 258 } 259 #endif 260 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 266 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 267 { 268 int ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 278 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 int ierr; 281 PetscFunctionBegin; 282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatView_SeqSBAIJ_Binary" 288 static int MatView_SeqSBAIJ_Binary(Mat A,PetscViewer viewer) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 292 PetscScalar *aa; 293 FILE *file; 294 295 PetscFunctionBegin; 296 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 297 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 298 col_lens[0] = MAT_FILE_COOKIE; 299 300 col_lens[1] = A->m; 301 col_lens[2] = A->m; 302 col_lens[3] = a->s_nz*bs2; 303 304 /* store lengths of each row and write (including header) to file */ 305 count = 0; 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 309 } 310 } 311 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 312 ierr = PetscFree(col_lens);CHKERRQ(ierr); 313 314 /* store column indices (zero start index) */ 315 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 316 count = 0; 317 for (i=0; i<a->mbs; i++) { 318 for (j=0; j<bs; j++) { 319 for (k=a->i[i]; k<a->i[i+1]; k++) { 320 for (l=0; l<bs; l++) { 321 jj[count++] = bs*a->j[k] + l; 322 } 323 } 324 } 325 } 326 ierr = PetscBinaryWrite(fd,jj,bs2*a->s_nz,PETSC_INT,0);CHKERRQ(ierr); 327 ierr = PetscFree(jj);CHKERRQ(ierr); 328 329 /* store nonzero values */ 330 ierr = PetscMalloc((a->s_nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 331 count = 0; 332 for (i=0; i<a->mbs; i++) { 333 for (j=0; j<bs; j++) { 334 for (k=a->i[i]; k<a->i[i+1]; k++) { 335 for (l=0; l<bs; l++) { 336 aa[count++] = a->a[bs2*k + l*bs + j]; 337 } 338 } 339 } 340 } 341 ierr = PetscBinaryWrite(fd,aa,bs2*a->s_nz,PETSC_SCALAR,0);CHKERRQ(ierr); 342 ierr = PetscFree(aa);CHKERRQ(ierr); 343 344 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 345 if (file) { 346 fprintf(file,"-matload_block_size %d\n",a->bs); 347 } 348 PetscFunctionReturn(0); 349 } 350 351 #undef __FUNCT__ 352 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 353 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 354 { 355 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 356 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 357 char *name; 358 PetscViewerFormat format; 359 360 PetscFunctionBegin; 361 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 362 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 363 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 364 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 365 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 366 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 367 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 368 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 369 for (i=0; i<a->mbs; i++) { 370 for (j=0; j<bs; j++) { 371 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 372 for (k=a->i[i]; k<a->i[i+1]; k++) { 373 for (l=0; l<bs; l++) { 374 #if defined(PETSC_USE_COMPLEX) 375 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 376 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 377 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 378 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 379 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 380 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 381 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 382 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 383 } 384 #else 385 if (a->a[bs2*k + l*bs + j] != 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 387 } 388 #endif 389 } 390 } 391 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 392 } 393 } 394 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 395 } else { 396 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 397 for (i=0; i<a->mbs; i++) { 398 for (j=0; j<bs; j++) { 399 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 400 for (k=a->i[i]; k<a->i[i+1]; k++) { 401 for (l=0; l<bs; l++) { 402 #if defined(PETSC_USE_COMPLEX) 403 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 404 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 405 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 406 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 407 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 408 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 409 } else { 410 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 411 } 412 #else 413 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 414 #endif 415 } 416 } 417 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 418 } 419 } 420 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 421 } 422 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 428 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 429 { 430 Mat A = (Mat) Aa; 431 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 432 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 433 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 434 MatScalar *aa; 435 MPI_Comm comm; 436 PetscViewer viewer; 437 438 PetscFunctionBegin; 439 /* 440 This is nasty. If this is called from an originally parallel matrix 441 then all processes call this,but only the first has the matrix so the 442 rest should return immediately. 443 */ 444 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 445 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 446 if (rank) PetscFunctionReturn(0); 447 448 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 449 450 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 451 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 452 453 /* loop over matrix elements drawing boxes */ 454 color = PETSC_DRAW_BLUE; 455 for (i=0,row=0; i<mbs; i++,row+=bs) { 456 for (j=a->i[i]; j<a->i[i+1]; j++) { 457 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 458 x_l = a->j[j]*bs; x_r = x_l + 1.0; 459 aa = a->a + j*bs2; 460 for (k=0; k<bs; k++) { 461 for (l=0; l<bs; l++) { 462 if (PetscRealPart(*aa++) >= 0.) continue; 463 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 464 } 465 } 466 } 467 } 468 color = PETSC_DRAW_CYAN; 469 for (i=0,row=0; i<mbs; i++,row+=bs) { 470 for (j=a->i[i]; j<a->i[i+1]; j++) { 471 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 472 x_l = a->j[j]*bs; x_r = x_l + 1.0; 473 aa = a->a + j*bs2; 474 for (k=0; k<bs; k++) { 475 for (l=0; l<bs; l++) { 476 if (PetscRealPart(*aa++) != 0.) continue; 477 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 483 color = PETSC_DRAW_RED; 484 for (i=0,row=0; i<mbs; i++,row+=bs) { 485 for (j=a->i[i]; j<a->i[i+1]; j++) { 486 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 487 x_l = a->j[j]*bs; x_r = x_l + 1.0; 488 aa = a->a + j*bs2; 489 for (k=0; k<bs; k++) { 490 for (l=0; l<bs; l++) { 491 if (PetscRealPart(*aa++) <= 0.) continue; 492 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 493 } 494 } 495 } 496 } 497 PetscFunctionReturn(0); 498 } 499 500 #undef __FUNCT__ 501 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 502 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 503 { 504 int ierr; 505 PetscReal xl,yl,xr,yr,w,h; 506 PetscDraw draw; 507 PetscTruth isnull; 508 509 PetscFunctionBegin; 510 511 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 512 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 513 514 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 515 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 516 xr += w; yr += h; xl = -w; yl = -h; 517 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 518 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 519 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 520 PetscFunctionReturn(0); 521 } 522 523 #undef __FUNCT__ 524 #define __FUNCT__ "MatView_SeqSBAIJ" 525 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 526 { 527 int ierr; 528 PetscTruth issocket,isascii,isbinary,isdraw; 529 530 PetscFunctionBegin; 531 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 532 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 533 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 534 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 535 if (issocket) { 536 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 537 } else if (isascii){ 538 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 539 } else if (isbinary) { 540 ierr = MatView_SeqSBAIJ_Binary(A,viewer);CHKERRQ(ierr); 541 } else if (isdraw) { 542 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 543 } else { 544 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 545 } 546 PetscFunctionReturn(0); 547 } 548 549 550 #undef __FUNCT__ 551 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 552 int MatGetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 553 { 554 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 555 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 556 int *ai = a->i,*ailen = a->ilen; 557 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 558 MatScalar *ap,*aa = a->a,zero = 0.0; 559 560 PetscFunctionBegin; 561 for (k=0; k<m; k++) { /* loop over rows */ 562 row = im[k]; brow = row/bs; 563 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 564 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 565 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 566 nrow = ailen[brow]; 567 for (l=0; l<n; l++) { /* loop over columns */ 568 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 569 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 570 col = in[l] ; 571 bcol = col/bs; 572 cidx = col%bs; 573 ridx = row%bs; 574 high = nrow; 575 low = 0; /* assume unsorted */ 576 while (high-low > 5) { 577 t = (low+high)/2; 578 if (rp[t] > bcol) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > bcol) break; 583 if (rp[i] == bcol) { 584 *v++ = ap[bs2*i+bs*cidx+ridx]; 585 goto finished; 586 } 587 } 588 *v++ = zero; 589 finished:; 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 598 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 599 { 600 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 601 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 602 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 603 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 604 PetscTruth roworiented=a->roworiented; 605 MatScalar *value = v,*ap,*aa = a->a,*bap; 606 607 PetscFunctionBegin; 608 if (roworiented) { 609 stepval = (n-1)*bs; 610 } else { 611 stepval = (m-1)*bs; 612 } 613 for (k=0; k<m; k++) { /* loop over added rows */ 614 row = im[k]; 615 if (row < 0) continue; 616 #if defined(PETSC_USE_BOPT_g) 617 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 618 #endif 619 rp = aj + ai[row]; 620 ap = aa + bs2*ai[row]; 621 rmax = imax[row]; 622 nrow = ailen[row]; 623 low = 0; 624 for (l=0; l<n; l++) { /* loop over added columns */ 625 if (in[l] < 0) continue; 626 #if defined(PETSC_USE_BOPT_g) 627 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 628 #endif 629 col = in[l]; 630 if (col < row) continue; /* ignore lower triangular block */ 631 if (roworiented) { 632 value = v + k*(stepval+bs)*bs + l*bs; 633 } else { 634 value = v + l*(stepval+bs)*bs + k*bs; 635 } 636 if (!sorted) low = 0; high = nrow; 637 while (high-low > 7) { 638 t = (low+high)/2; 639 if (rp[t] > col) high = t; 640 else low = t; 641 } 642 for (i=low; i<high; i++) { 643 if (rp[i] > col) break; 644 if (rp[i] == col) { 645 bap = ap + bs2*i; 646 if (roworiented) { 647 if (is == ADD_VALUES) { 648 for (ii=0; ii<bs; ii++,value+=stepval) { 649 for (jj=ii; jj<bs2; jj+=bs) { 650 bap[jj] += *value++; 651 } 652 } 653 } else { 654 for (ii=0; ii<bs; ii++,value+=stepval) { 655 for (jj=ii; jj<bs2; jj+=bs) { 656 bap[jj] = *value++; 657 } 658 } 659 } 660 } else { 661 if (is == ADD_VALUES) { 662 for (ii=0; ii<bs; ii++,value+=stepval) { 663 for (jj=0; jj<bs; jj++) { 664 *bap++ += *value++; 665 } 666 } 667 } else { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=0; jj<bs; jj++) { 670 *bap++ = *value++; 671 } 672 } 673 } 674 } 675 goto noinsert2; 676 } 677 } 678 if (nonew == 1) goto noinsert2; 679 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 680 if (nrow >= rmax) { 681 /* there is no extra room in row, therefore enlarge */ 682 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 683 MatScalar *new_a; 684 685 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 686 687 /* malloc new storage space */ 688 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 689 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 690 new_j = (int*)(new_a + bs2*new_nz); 691 new_i = new_j + new_nz; 692 693 /* copy over old data into new slots */ 694 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 695 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 696 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 697 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 698 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 699 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 700 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 701 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 702 /* free up old matrix storage */ 703 ierr = PetscFree(a->a);CHKERRQ(ierr); 704 if (!a->singlemalloc) { 705 ierr = PetscFree(a->i);CHKERRQ(ierr); 706 ierr = PetscFree(a->j);CHKERRQ(ierr); 707 } 708 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 709 a->singlemalloc = PETSC_TRUE; 710 711 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 712 rmax = imax[row] = imax[row] + CHUNKSIZE; 713 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 714 a->s_maxnz += bs2*CHUNKSIZE; 715 a->reallocs++; 716 a->s_nz++; 717 } 718 N = nrow++ - 1; 719 /* shift up all the later entries in this row */ 720 for (ii=N; ii>=i; ii--) { 721 rp[ii+1] = rp[ii]; 722 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 723 } 724 if (N >= i) { 725 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 726 } 727 rp[i] = col; 728 bap = ap + bs2*i; 729 if (roworiented) { 730 for (ii=0; ii<bs; ii++,value+=stepval) { 731 for (jj=ii; jj<bs2; jj+=bs) { 732 bap[jj] = *value++; 733 } 734 } 735 } else { 736 for (ii=0; ii<bs; ii++,value+=stepval) { 737 for (jj=0; jj<bs; jj++) { 738 *bap++ = *value++; 739 } 740 } 741 } 742 noinsert2:; 743 low = i; 744 } 745 ailen[row] = nrow; 746 } 747 PetscFunctionReturn(0); 748 } 749 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 752 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 753 { 754 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 755 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 756 int m = A->m,*ip,N,*ailen = a->ilen; 757 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 758 MatScalar *aa = a->a,*ap; 759 #if defined(PETSC_HAVE_SPOOLES) 760 PetscTruth flag; 761 #endif 762 763 PetscFunctionBegin; 764 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 765 766 if (m) rmax = ailen[0]; 767 for (i=1; i<mbs; i++) { 768 /* move each row back by the amount of empty slots (fshift) before it*/ 769 fshift += imax[i-1] - ailen[i-1]; 770 rmax = PetscMax(rmax,ailen[i]); 771 if (fshift) { 772 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 773 N = ailen[i]; 774 for (j=0; j<N; j++) { 775 ip[j-fshift] = ip[j]; 776 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 777 } 778 } 779 ai[i] = ai[i-1] + ailen[i-1]; 780 } 781 if (mbs) { 782 fshift += imax[mbs-1] - ailen[mbs-1]; 783 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 784 } 785 /* reset ilen and imax for each row */ 786 for (i=0; i<mbs; i++) { 787 ailen[i] = imax[i] = ai[i+1] - ai[i]; 788 } 789 a->s_nz = ai[mbs]; 790 791 /* diagonals may have moved, reset it */ 792 if (a->diag) { 793 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 794 } 795 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 796 m,A->m,a->bs,fshift*bs2,a->s_nz*bs2); 797 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 798 a->reallocs); 799 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 800 a->reallocs = 0; 801 A->info.nz_unneeded = (PetscReal)fshift*bs2; 802 803 #if defined(PETSC_HAVE_SPOOLES) 804 ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr); 805 if (flag) { ierr = MatUseSpooles_SeqSBAIJ(A);CHKERRQ(ierr); } 806 #endif 807 808 PetscFunctionReturn(0); 809 } 810 811 /* 812 This function returns an array of flags which indicate the locations of contiguous 813 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 814 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 815 Assume: sizes should be long enough to hold all the values. 816 */ 817 #undef __FUNCT__ 818 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 819 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 820 { 821 int i,j,k,row; 822 PetscTruth flg; 823 824 PetscFunctionBegin; 825 for (i=0,j=0; i<n; j++) { 826 row = idx[i]; 827 if (row%bs!=0) { /* Not the begining of a block */ 828 sizes[j] = 1; 829 i++; 830 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 831 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 832 i++; 833 } else { /* Begining of the block, so check if the complete block exists */ 834 flg = PETSC_TRUE; 835 for (k=1; k<bs; k++) { 836 if (row+k != idx[i+k]) { /* break in the block */ 837 flg = PETSC_FALSE; 838 break; 839 } 840 } 841 if (flg == PETSC_TRUE) { /* No break in the bs */ 842 sizes[j] = bs; 843 i+= bs; 844 } else { 845 sizes[j] = 1; 846 i++; 847 } 848 } 849 } 850 *bs_max = j; 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 856 int MatZeroRows_SeqSBAIJ(Mat A,IS is,PetscScalar *diag) 857 { 858 PetscFunctionBegin; 859 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 860 } 861 862 /* Only add/insert a(i,j) with i<=j (blocks). 863 Any a(i,j) with i>j input by user is ingored. 864 */ 865 866 #undef __FUNCT__ 867 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 868 int MatSetValues_SeqSBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 869 { 870 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 871 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 872 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 873 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 874 int ridx,cidx,bs2=a->bs2,ierr; 875 MatScalar *ap,value,*aa=a->a,*bap; 876 877 PetscFunctionBegin; 878 879 for (k=0; k<m; k++) { /* loop over added rows */ 880 row = im[k]; /* row number */ 881 brow = row/bs; /* block row number */ 882 if (row < 0) continue; 883 #if defined(PETSC_USE_BOPT_g) 884 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 885 #endif 886 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 887 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 888 rmax = imax[brow]; /* maximum space allocated for this row */ 889 nrow = ailen[brow]; /* actual length of this row */ 890 low = 0; 891 892 for (l=0; l<n; l++) { /* loop over added columns */ 893 if (in[l] < 0) continue; 894 #if defined(PETSC_USE_BOPT_g) 895 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m); 896 #endif 897 col = in[l]; 898 bcol = col/bs; /* block col number */ 899 900 if (brow <= bcol){ 901 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 902 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 903 /* element value a(k,l) */ 904 if (roworiented) { 905 value = v[l + k*n]; 906 } else { 907 value = v[k + l*m]; 908 } 909 910 /* move pointer bap to a(k,l) quickly and add/insert value */ 911 if (!sorted) low = 0; high = nrow; 912 while (high-low > 7) { 913 t = (low+high)/2; 914 if (rp[t] > bcol) high = t; 915 else low = t; 916 } 917 for (i=low; i<high; i++) { 918 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 919 if (rp[i] > bcol) break; 920 if (rp[i] == bcol) { 921 bap = ap + bs2*i + bs*cidx + ridx; 922 if (is == ADD_VALUES) *bap += value; 923 else *bap = value; 924 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 925 if (brow == bcol && ridx < cidx){ 926 bap = ap + bs2*i + bs*ridx + cidx; 927 if (is == ADD_VALUES) *bap += value; 928 else *bap = value; 929 } 930 goto noinsert1; 931 } 932 } 933 934 if (nonew == 1) goto noinsert1; 935 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 936 if (nrow >= rmax) { 937 /* there is no extra room in row, therefore enlarge */ 938 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 939 MatScalar *new_a; 940 941 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 942 943 /* Malloc new storage space */ 944 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 945 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 946 new_j = (int*)(new_a + bs2*new_nz); 947 new_i = new_j + new_nz; 948 949 /* copy over old data into new slots */ 950 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 951 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 952 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 953 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 954 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 955 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 956 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 957 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 958 /* free up old matrix storage */ 959 ierr = PetscFree(a->a);CHKERRQ(ierr); 960 if (!a->singlemalloc) { 961 ierr = PetscFree(a->i);CHKERRQ(ierr); 962 ierr = PetscFree(a->j);CHKERRQ(ierr); 963 } 964 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 965 a->singlemalloc = PETSC_TRUE; 966 967 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 968 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 969 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 970 a->s_maxnz += bs2*CHUNKSIZE; 971 a->reallocs++; 972 a->s_nz++; 973 } 974 975 N = nrow++ - 1; 976 /* shift up all the later entries in this row */ 977 for (ii=N; ii>=i; ii--) { 978 rp[ii+1] = rp[ii]; 979 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 if (N>=i) { 982 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 983 } 984 rp[i] = bcol; 985 ap[bs2*i + bs*cidx + ridx] = value; 986 noinsert1:; 987 low = i; 988 /* } */ 989 } 990 } /* end of if .. if.. */ 991 } /* end of loop over added columns */ 992 ailen[brow] = nrow; 993 } /* end of loop over added rows */ 994 995 PetscFunctionReturn(0); 996 } 997 998 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,PetscReal,Mat*); 999 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,PetscReal); 1000 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS*,int); 1001 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1002 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1003 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 1004 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 1005 extern int MatScale_SeqSBAIJ(PetscScalar*,Mat); 1006 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 1007 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 1008 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 1009 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 1010 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 1011 extern int MatZeroEntries_SeqSBAIJ(Mat); 1012 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 1013 1014 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 1015 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 1016 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 1017 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 1018 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 1019 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 1020 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 1021 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 1022 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 1023 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 1024 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 1025 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 1026 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 1027 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 1028 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 1029 1030 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 1031 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 1032 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 1033 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1034 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 1035 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 1036 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 1037 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 1038 1039 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 1040 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 1041 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 1042 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 1043 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 1044 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 1045 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 1046 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 1047 1048 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 1049 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 1050 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 1051 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 1052 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1053 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1054 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1055 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1056 1057 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1058 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1059 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1060 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1061 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1062 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1063 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1064 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1065 1066 #ifdef HAVE_MatICCFactor 1067 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1070 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,PetscReal fill,int level) 1071 { 1072 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1073 Mat outA; 1074 int ierr; 1075 PetscTruth row_identity,col_identity; 1076 1077 PetscFunctionBegin; 1078 /* 1079 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1080 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1081 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1082 if (!row_identity || !col_identity) { 1083 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1084 } 1085 */ 1086 1087 outA = inA; 1088 inA->factor = FACTOR_CHOLESKY; 1089 1090 if (!a->diag) { 1091 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1092 } 1093 /* 1094 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1095 for ILU(0) factorization with natural ordering 1096 */ 1097 switch (a->bs) { 1098 case 1: 1099 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1100 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1101 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1102 case 2: 1103 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1104 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1105 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1106 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1107 break; 1108 case 3: 1109 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1110 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1111 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1112 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1113 break; 1114 case 4: 1115 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1116 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1117 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1118 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1119 break; 1120 case 5: 1121 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1122 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1123 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1124 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1125 break; 1126 case 6: 1127 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1128 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1129 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1130 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1131 break; 1132 case 7: 1133 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1134 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1135 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1136 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1137 break; 1138 default: 1139 a->row = row; 1140 a->icol = col; 1141 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1142 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1143 1144 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1145 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1146 PetscLogObjectParent(inA,a->icol); 1147 1148 if (!a->solve_work) { 1149 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1150 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1151 } 1152 } 1153 1154 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1155 1156 PetscFunctionReturn(0); 1157 } 1158 #endif 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1162 int MatPrintHelp_SeqSBAIJ(Mat A) 1163 { 1164 static PetscTruth called = PETSC_FALSE; 1165 MPI_Comm comm = A->comm; 1166 int ierr; 1167 1168 PetscFunctionBegin; 1169 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1170 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1171 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 EXTERN_C_BEGIN 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1178 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1179 { 1180 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1181 int i,nz,n; 1182 1183 PetscFunctionBegin; 1184 nz = baij->s_maxnz; 1185 n = mat->n; 1186 for (i=0; i<nz; i++) { 1187 baij->j[i] = indices[i]; 1188 } 1189 baij->s_nz = nz; 1190 for (i=0; i<n; i++) { 1191 baij->ilen[i] = baij->imax[i]; 1192 } 1193 1194 PetscFunctionReturn(0); 1195 } 1196 EXTERN_C_END 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1200 /*@ 1201 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1202 in the matrix. 1203 1204 Input Parameters: 1205 + mat - the SeqSBAIJ matrix 1206 - indices - the column indices 1207 1208 Level: advanced 1209 1210 Notes: 1211 This can be called if you have precomputed the nonzero structure of the 1212 matrix and want to provide it to the matrix object to improve the performance 1213 of the MatSetValues() operation. 1214 1215 You MUST have set the correct numbers of nonzeros per row in the call to 1216 MatCreateSeqSBAIJ(). 1217 1218 MUST be called before any calls to MatSetValues() 1219 1220 .seealso: MatCreateSeqSBAIJ 1221 @*/ 1222 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1223 { 1224 int ierr,(*f)(Mat,int *); 1225 1226 PetscFunctionBegin; 1227 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1228 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1229 if (f) { 1230 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1231 } else { 1232 SETERRQ(1,"Wrong type of matrix to set column indices"); 1233 } 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1239 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1240 { 1241 int ierr; 1242 1243 PetscFunctionBegin; 1244 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 #include "petscblaslapack.h" 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1251 int MatAXPY_SeqSBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 1252 { 1253 int ierr,one=1; 1254 Mat_SeqSBAIJ *x = (Mat_SeqSBAIJ *)X->data,*y = (Mat_SeqSBAIJ *)Y->data; 1255 1256 PetscFunctionBegin; 1257 if (str == SAME_NONZERO_PATTERN) { 1258 BLaxpy_(&x->s_nz,a,x->a,&one,y->a,&one); 1259 } else { 1260 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1261 } 1262 PetscFunctionReturn(0); 1263 } 1264 1265 /* -------------------------------------------------------------------*/ 1266 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1267 MatGetRow_SeqSBAIJ, 1268 MatRestoreRow_SeqSBAIJ, 1269 MatMult_SeqSBAIJ_N, 1270 MatMultAdd_SeqSBAIJ_N, 1271 MatMultTranspose_SeqSBAIJ, 1272 MatMultTransposeAdd_SeqSBAIJ, 1273 MatSolve_SeqSBAIJ_N, 1274 0, 1275 0, 1276 0, 1277 0, 1278 MatCholeskyFactor_SeqSBAIJ, 1279 MatRelax_SeqSBAIJ, 1280 MatTranspose_SeqSBAIJ, 1281 MatGetInfo_SeqSBAIJ, 1282 MatEqual_SeqSBAIJ, 1283 MatGetDiagonal_SeqSBAIJ, 1284 MatDiagonalScale_SeqSBAIJ, 1285 MatNorm_SeqSBAIJ, 1286 0, 1287 MatAssemblyEnd_SeqSBAIJ, 1288 0, 1289 MatSetOption_SeqSBAIJ, 1290 MatZeroEntries_SeqSBAIJ, 1291 MatZeroRows_SeqSBAIJ, 1292 0, 1293 0, 1294 MatCholeskyFactorSymbolic_SeqSBAIJ, 1295 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1296 MatSetUpPreallocation_SeqSBAIJ, 1297 0, 1298 MatICCFactorSymbolic_SeqSBAIJ, 1299 0, 1300 0, 1301 MatDuplicate_SeqSBAIJ, 1302 0, 1303 0, 1304 0, 1305 0, 1306 MatAXPY_SeqSBAIJ, 1307 MatGetSubMatrices_SeqSBAIJ, 1308 MatIncreaseOverlap_SeqSBAIJ, 1309 MatGetValues_SeqSBAIJ, 1310 0, 1311 MatPrintHelp_SeqSBAIJ, 1312 MatScale_SeqSBAIJ, 1313 0, 1314 0, 1315 0, 1316 MatGetBlockSize_SeqSBAIJ, 1317 MatGetRowIJ_SeqSBAIJ, 1318 MatRestoreRowIJ_SeqSBAIJ, 1319 0, 1320 0, 1321 0, 1322 0, 1323 0, 1324 0, 1325 MatSetValuesBlocked_SeqSBAIJ, 1326 MatGetSubMatrix_SeqSBAIJ, 1327 0, 1328 0, 1329 MatGetPetscMaps_Petsc, 1330 0, 1331 0, 1332 0, 1333 0, 1334 0, 1335 0, 1336 MatGetRowMax_SeqSBAIJ}; 1337 1338 EXTERN_C_BEGIN 1339 #undef __FUNCT__ 1340 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1341 int MatStoreValues_SeqSBAIJ(Mat mat) 1342 { 1343 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1344 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1345 int ierr; 1346 1347 PetscFunctionBegin; 1348 if (aij->nonew != 1) { 1349 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1350 } 1351 1352 /* allocate space for values if not already there */ 1353 if (!aij->saved_values) { 1354 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1355 } 1356 1357 /* copy values over */ 1358 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 EXTERN_C_END 1362 1363 EXTERN_C_BEGIN 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1366 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1367 { 1368 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1369 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1370 1371 PetscFunctionBegin; 1372 if (aij->nonew != 1) { 1373 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1374 } 1375 if (!aij->saved_values) { 1376 SETERRQ(1,"Must call MatStoreValues(A);first"); 1377 } 1378 1379 /* copy values over */ 1380 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1381 PetscFunctionReturn(0); 1382 } 1383 EXTERN_C_END 1384 1385 EXTERN_C_BEGIN 1386 #undef __FUNCT__ 1387 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1388 int MatCreate_SeqSBAIJ(Mat B) 1389 { 1390 Mat_SeqSBAIJ *b; 1391 int ierr,size; 1392 1393 PetscFunctionBegin; 1394 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1395 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1396 B->m = B->M = PetscMax(B->m,B->M); 1397 B->n = B->N = PetscMax(B->n,B->N); 1398 1399 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1400 B->data = (void*)b; 1401 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1402 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1403 B->ops->destroy = MatDestroy_SeqSBAIJ; 1404 B->ops->view = MatView_SeqSBAIJ; 1405 B->factor = 0; 1406 B->lupivotthreshold = 1.0; 1407 B->mapping = 0; 1408 b->row = 0; 1409 b->icol = 0; 1410 b->reallocs = 0; 1411 b->saved_values = 0; 1412 1413 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1414 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1415 1416 b->sorted = PETSC_FALSE; 1417 b->roworiented = PETSC_TRUE; 1418 b->nonew = 0; 1419 b->diag = 0; 1420 b->solve_work = 0; 1421 b->mult_work = 0; 1422 B->spptr = 0; 1423 b->keepzeroedrows = PETSC_FALSE; 1424 1425 b->inew = 0; 1426 b->jnew = 0; 1427 b->anew = 0; 1428 b->a2anew = 0; 1429 b->permute = PETSC_FALSE; 1430 1431 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1432 "MatStoreValues_SeqSBAIJ", 1433 (void*)MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1434 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1435 "MatRetrieveValues_SeqSBAIJ", 1436 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1437 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1438 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1439 (void*)MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1440 PetscFunctionReturn(0); 1441 } 1442 EXTERN_C_END 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1446 /*@C 1447 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1448 compressed row) format. For good matrix assembly performance the 1449 user should preallocate the matrix storage by setting the parameter nz 1450 (or the array nnz). By setting these parameters accurately, performance 1451 during matrix assembly can be increased by more than a factor of 50. 1452 1453 Collective on Mat 1454 1455 Input Parameters: 1456 + A - the symmetric matrix 1457 . bs - size of block 1458 . nz - number of block nonzeros per block row (same for all rows) 1459 - nnz - array containing the number of block nonzeros in the upper triangular plus 1460 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1461 1462 Options Database Keys: 1463 . -mat_no_unroll - uses code that does not unroll the loops in the 1464 block calculations (much slower) 1465 . -mat_block_size - size of the blocks to use 1466 1467 Level: intermediate 1468 1469 Notes: 1470 Specify the preallocated storage with either nz or nnz (not both). 1471 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1472 allocation. For additional details, see the users manual chapter on 1473 matrices. 1474 1475 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1476 @*/ 1477 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1478 { 1479 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1480 int i,len,ierr,mbs,bs2; 1481 PetscTruth flg; 1482 int s_nz; 1483 1484 PetscFunctionBegin; 1485 B->preallocated = PETSC_TRUE; 1486 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1487 mbs = B->m/bs; 1488 bs2 = bs*bs; 1489 1490 if (mbs*bs != B->m) { 1491 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1492 } 1493 1494 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1495 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1496 if (nnz) { 1497 for (i=0; i<mbs; i++) { 1498 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1499 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1500 } 1501 } 1502 1503 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1504 if (!flg) { 1505 switch (bs) { 1506 case 1: 1507 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1508 B->ops->solve = MatSolve_SeqSBAIJ_1; 1509 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1510 B->ops->mult = MatMult_SeqSBAIJ_1; 1511 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1512 break; 1513 case 2: 1514 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1515 B->ops->solve = MatSolve_SeqSBAIJ_2; 1516 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1517 B->ops->mult = MatMult_SeqSBAIJ_2; 1518 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1519 break; 1520 case 3: 1521 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1522 B->ops->solve = MatSolve_SeqSBAIJ_3; 1523 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1524 B->ops->mult = MatMult_SeqSBAIJ_3; 1525 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1526 break; 1527 case 4: 1528 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1529 B->ops->solve = MatSolve_SeqSBAIJ_4; 1530 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1531 B->ops->mult = MatMult_SeqSBAIJ_4; 1532 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1533 break; 1534 case 5: 1535 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1536 B->ops->solve = MatSolve_SeqSBAIJ_5; 1537 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1538 B->ops->mult = MatMult_SeqSBAIJ_5; 1539 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1540 break; 1541 case 6: 1542 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1543 B->ops->solve = MatSolve_SeqSBAIJ_6; 1544 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1545 B->ops->mult = MatMult_SeqSBAIJ_6; 1546 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1547 break; 1548 case 7: 1549 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1550 B->ops->solve = MatSolve_SeqSBAIJ_7; 1551 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1552 B->ops->mult = MatMult_SeqSBAIJ_7; 1553 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1554 break; 1555 } 1556 } 1557 1558 b->mbs = mbs; 1559 b->nbs = mbs; 1560 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1561 if (!nnz) { 1562 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1563 else if (nz <= 0) nz = 1; 1564 for (i=0; i<mbs; i++) { 1565 b->imax[i] = nz; 1566 } 1567 nz = nz*mbs; /* total nz */ 1568 } else { 1569 nz = 0; 1570 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1571 } 1572 /* s_nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1573 s_nz = nz; 1574 1575 /* allocate the matrix space */ 1576 len = s_nz*sizeof(int) + s_nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1577 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1578 ierr = PetscMemzero(b->a,s_nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1579 b->j = (int*)(b->a + s_nz*bs2); 1580 ierr = PetscMemzero(b->j,s_nz*sizeof(int));CHKERRQ(ierr); 1581 b->i = b->j + s_nz; 1582 b->singlemalloc = PETSC_TRUE; 1583 1584 /* pointer to beginning of each row */ 1585 b->i[0] = 0; 1586 for (i=1; i<mbs+1; i++) { 1587 b->i[i] = b->i[i-1] + b->imax[i-1]; 1588 } 1589 1590 /* b->ilen will count nonzeros in each block row so far. */ 1591 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1592 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1593 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1594 1595 b->bs = bs; 1596 b->bs2 = bs2; 1597 b->s_nz = 0; 1598 b->s_maxnz = s_nz*bs2; 1599 1600 b->inew = 0; 1601 b->jnew = 0; 1602 b->anew = 0; 1603 b->a2anew = 0; 1604 b->permute = PETSC_FALSE; 1605 PetscFunctionReturn(0); 1606 } 1607 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatCreateSeqSBAIJ" 1611 /*@C 1612 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1613 compressed row) format. For good matrix assembly performance the 1614 user should preallocate the matrix storage by setting the parameter nz 1615 (or the array nnz). By setting these parameters accurately, performance 1616 during matrix assembly can be increased by more than a factor of 50. 1617 1618 Collective on MPI_Comm 1619 1620 Input Parameters: 1621 + comm - MPI communicator, set to PETSC_COMM_SELF 1622 . bs - size of block 1623 . m - number of rows, or number of columns 1624 . nz - number of block nonzeros per block row (same for all rows) 1625 - nnz - array containing the number of block nonzeros in the upper triangular plus 1626 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1627 1628 Output Parameter: 1629 . A - the symmetric matrix 1630 1631 Options Database Keys: 1632 . -mat_no_unroll - uses code that does not unroll the loops in the 1633 block calculations (much slower) 1634 . -mat_block_size - size of the blocks to use 1635 1636 Level: intermediate 1637 1638 Notes: 1639 1640 Specify the preallocated storage with either nz or nnz (not both). 1641 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1642 allocation. For additional details, see the users manual chapter on 1643 matrices. 1644 1645 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1646 @*/ 1647 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1648 { 1649 int ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1653 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1654 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1660 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1661 { 1662 Mat C; 1663 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1664 int i,len,mbs = a->mbs,nz = a->s_nz,bs2 =a->bs2,ierr; 1665 1666 PetscFunctionBegin; 1667 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1668 1669 *B = 0; 1670 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1671 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1672 c = (Mat_SeqSBAIJ*)C->data; 1673 1674 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1675 C->preallocated = PETSC_TRUE; 1676 C->factor = A->factor; 1677 c->row = 0; 1678 c->icol = 0; 1679 c->saved_values = 0; 1680 c->keepzeroedrows = a->keepzeroedrows; 1681 C->assembled = PETSC_TRUE; 1682 1683 c->bs = a->bs; 1684 c->bs2 = a->bs2; 1685 c->mbs = a->mbs; 1686 c->nbs = a->nbs; 1687 1688 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1689 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1690 for (i=0; i<mbs; i++) { 1691 c->imax[i] = a->imax[i]; 1692 c->ilen[i] = a->ilen[i]; 1693 } 1694 1695 /* allocate the matrix space */ 1696 c->singlemalloc = PETSC_TRUE; 1697 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1698 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1699 c->j = (int*)(c->a + nz*bs2); 1700 c->i = c->j + nz; 1701 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1702 if (mbs > 0) { 1703 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1704 if (cpvalues == MAT_COPY_VALUES) { 1705 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1706 } else { 1707 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1708 } 1709 } 1710 1711 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1712 c->sorted = a->sorted; 1713 c->roworiented = a->roworiented; 1714 c->nonew = a->nonew; 1715 1716 if (a->diag) { 1717 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1718 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1719 for (i=0; i<mbs; i++) { 1720 c->diag[i] = a->diag[i]; 1721 } 1722 } else c->diag = 0; 1723 c->s_nz = a->s_nz; 1724 c->s_maxnz = a->s_maxnz; 1725 c->solve_work = 0; 1726 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1727 c->mult_work = 0; 1728 *B = C; 1729 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 EXTERN_C_BEGIN 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1736 int MatLoad_SeqSBAIJ(PetscViewer viewer,MatType type,Mat *A) 1737 { 1738 Mat_SeqSBAIJ *a; 1739 Mat B; 1740 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1741 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1742 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1743 int *masked,nmask,tmp,bs2,ishift; 1744 PetscScalar *aa; 1745 MPI_Comm comm = ((PetscObject)viewer)->comm; 1746 1747 PetscFunctionBegin; 1748 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1749 bs2 = bs*bs; 1750 1751 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1752 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1753 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1754 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1755 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1756 M = header[1]; N = header[2]; nz = header[3]; 1757 1758 if (header[3] < 0) { 1759 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1760 } 1761 1762 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1763 1764 /* 1765 This code adds extra rows to make sure the number of rows is 1766 divisible by the blocksize 1767 */ 1768 mbs = M/bs; 1769 extra_rows = bs - M + bs*(mbs); 1770 if (extra_rows == bs) extra_rows = 0; 1771 else mbs++; 1772 if (extra_rows) { 1773 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1774 } 1775 1776 /* read in row lengths */ 1777 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1778 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1779 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1780 1781 /* read in column indices */ 1782 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1783 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1784 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1785 1786 /* loop over row lengths determining block row lengths */ 1787 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1788 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1789 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1790 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1791 masked = mask + mbs; 1792 rowcount = 0; nzcount = 0; 1793 for (i=0; i<mbs; i++) { 1794 nmask = 0; 1795 for (j=0; j<bs; j++) { 1796 kmax = rowlengths[rowcount]; 1797 for (k=0; k<kmax; k++) { 1798 tmp = jj[nzcount++]/bs; /* block col. index */ 1799 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1800 } 1801 rowcount++; 1802 } 1803 s_browlengths[i] += nmask; 1804 1805 /* zero out the mask elements we set */ 1806 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1807 } 1808 1809 /* create our matrix */ 1810 ierr = MatCreateSeqSBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,s_browlengths,A);CHKERRQ(ierr); 1811 B = *A; 1812 a = (Mat_SeqSBAIJ*)B->data; 1813 1814 /* set matrix "i" values */ 1815 a->i[0] = 0; 1816 for (i=1; i<= mbs; i++) { 1817 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1818 a->ilen[i-1] = s_browlengths[i-1]; 1819 } 1820 a->s_nz = a->i[mbs]; 1821 1822 /* read in nonzero values */ 1823 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1824 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1825 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1826 1827 /* set "a" and "j" values into matrix */ 1828 nzcount = 0; jcount = 0; 1829 for (i=0; i<mbs; i++) { 1830 nzcountb = nzcount; 1831 nmask = 0; 1832 for (j=0; j<bs; j++) { 1833 kmax = rowlengths[i*bs+j]; 1834 for (k=0; k<kmax; k++) { 1835 tmp = jj[nzcount++]/bs; /* block col. index */ 1836 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1837 } 1838 } 1839 /* sort the masked values */ 1840 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1841 1842 /* set "j" values into matrix */ 1843 maskcount = 1; 1844 for (j=0; j<nmask; j++) { 1845 a->j[jcount++] = masked[j]; 1846 mask[masked[j]] = maskcount++; 1847 } 1848 1849 /* set "a" values into matrix */ 1850 ishift = bs2*a->i[i]; 1851 for (j=0; j<bs; j++) { 1852 kmax = rowlengths[i*bs+j]; 1853 for (k=0; k<kmax; k++) { 1854 tmp = jj[nzcountb]/bs ; /* block col. index */ 1855 if (tmp >= i){ 1856 block = mask[tmp] - 1; 1857 point = jj[nzcountb] - bs*tmp; 1858 idx = ishift + bs2*block + j + bs*point; 1859 a->a[idx] = aa[nzcountb]; 1860 } 1861 nzcountb++; 1862 } 1863 } 1864 /* zero out the mask elements we set */ 1865 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1866 } 1867 if (jcount != a->s_nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1868 1869 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1870 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1871 ierr = PetscFree(aa);CHKERRQ(ierr); 1872 ierr = PetscFree(jj);CHKERRQ(ierr); 1873 ierr = PetscFree(mask);CHKERRQ(ierr); 1874 1875 B->assembled = PETSC_TRUE; 1876 ierr = MatView_Private(B);CHKERRQ(ierr); 1877 PetscFunctionReturn(0); 1878 } 1879 EXTERN_C_END 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1883 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1884 { 1885 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1886 MatScalar *aa=a->a,*v,*v1; 1887 PetscScalar *x,*b,*t,sum,d; 1888 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1889 int nz,nz1,*vj,*vj1,i; 1890 1891 PetscFunctionBegin; 1892 its = its*lits; 1893 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1894 1895 if (bs > 1) 1896 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1897 1898 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1899 if (xx != bb) { 1900 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1901 } else { 1902 b = x; 1903 } 1904 1905 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1906 1907 if (flag & SOR_ZERO_INITIAL_GUESS) { 1908 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1909 for (i=0; i<m; i++) 1910 t[i] = b[i]; 1911 1912 for (i=0; i<m; i++){ 1913 d = *(aa + ai[i]); /* diag[i] */ 1914 v = aa + ai[i] + 1; 1915 vj = aj + ai[i] + 1; 1916 nz = ai[i+1] - ai[i] - 1; 1917 x[i] = omega*t[i]/d; 1918 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1919 PetscLogFlops(2*nz-1); 1920 } 1921 } 1922 1923 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1924 for (i=0; i<m; i++) 1925 t[i] = b[i]; 1926 1927 for (i=0; i<m-1; i++){ /* update rhs */ 1928 v = aa + ai[i] + 1; 1929 vj = aj + ai[i] + 1; 1930 nz = ai[i+1] - ai[i] - 1; 1931 while (nz--) t[*vj++] -= x[i]*(*v++); 1932 PetscLogFlops(2*nz-1); 1933 } 1934 for (i=m-1; i>=0; i--){ 1935 d = *(aa + ai[i]); 1936 v = aa + ai[i] + 1; 1937 vj = aj + ai[i] + 1; 1938 nz = ai[i+1] - ai[i] - 1; 1939 sum = t[i]; 1940 while (nz--) sum -= x[*vj++]*(*v++); 1941 PetscLogFlops(2*nz-1); 1942 x[i] = (1-omega)*x[i] + omega*sum/d; 1943 } 1944 } 1945 its--; 1946 } 1947 1948 while (its--) { 1949 /* 1950 forward sweep: 1951 for i=0,...,m-1: 1952 sum[i] = (b[i] - U(i,:)x )/d[i]; 1953 x[i] = (1-omega)x[i] + omega*sum[i]; 1954 b = b - x[i]*U^T(i,:); 1955 1956 */ 1957 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1958 for (i=0; i<m; i++) 1959 t[i] = b[i]; 1960 1961 for (i=0; i<m; i++){ 1962 d = *(aa + ai[i]); /* diag[i] */ 1963 v = aa + ai[i] + 1; v1=v; 1964 vj = aj + ai[i] + 1; vj1=vj; 1965 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1966 sum = t[i]; 1967 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1968 x[i] = (1-omega)*x[i] + omega*sum/d; 1969 while (nz--) t[*vj++] -= x[i]*(*v++); 1970 PetscLogFlops(4*nz-2); 1971 } 1972 } 1973 1974 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1975 /* 1976 backward sweep: 1977 b = b - x[i]*U^T(i,:), i=0,...,n-2 1978 for i=m-1,...,0: 1979 sum[i] = (b[i] - U(i,:)x )/d[i]; 1980 x[i] = (1-omega)x[i] + omega*sum[i]; 1981 */ 1982 for (i=0; i<m; i++) 1983 t[i] = b[i]; 1984 1985 for (i=0; i<m-1; i++){ /* update rhs */ 1986 v = aa + ai[i] + 1; 1987 vj = aj + ai[i] + 1; 1988 nz = ai[i+1] - ai[i] - 1; 1989 while (nz--) t[*vj++] -= x[i]*(*v++); 1990 PetscLogFlops(2*nz-1); 1991 } 1992 for (i=m-1; i>=0; i--){ 1993 d = *(aa + ai[i]); 1994 v = aa + ai[i] + 1; 1995 vj = aj + ai[i] + 1; 1996 nz = ai[i+1] - ai[i] - 1; 1997 sum = t[i]; 1998 while (nz--) sum -= x[*vj++]*(*v++); 1999 PetscLogFlops(2*nz-1); 2000 x[i] = (1-omega)*x[i] + omega*sum/d; 2001 } 2002 } 2003 } 2004 2005 ierr = PetscFree(t); CHKERRQ(ierr); 2006 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2007 if (bb != xx) { 2008 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2009 } 2010 2011 PetscFunctionReturn(0); 2012 } 2013 2014 2015 2016 2017 2018 2019