1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = PETSC_TRUE; 154 break; 155 case MAT_COLUMN_ORIENTED: 156 a->roworiented = PETSC_FALSE; 157 break; 158 case MAT_COLUMNS_SORTED: 159 a->sorted = PETSC_TRUE; 160 break; 161 case MAT_COLUMNS_UNSORTED: 162 a->sorted = PETSC_FALSE; 163 break; 164 case MAT_KEEP_ZEROED_ROWS: 165 a->keepzeroedrows = PETSC_TRUE; 166 break; 167 case MAT_NO_NEW_NONZERO_LOCATIONS: 168 a->nonew = 1; 169 break; 170 case MAT_NEW_NONZERO_LOCATION_ERR: 171 a->nonew = -1; 172 break; 173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 174 a->nonew = -2; 175 break; 176 case MAT_YES_NEW_NONZERO_LOCATIONS: 177 a->nonew = 0; 178 break; 179 case MAT_ROWS_SORTED: 180 case MAT_ROWS_UNSORTED: 181 case MAT_YES_NEW_DIAGONALS: 182 case MAT_IGNORE_OFF_PROC_ENTRIES: 183 case MAT_USE_HASH_TABLE: 184 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 185 break; 186 case MAT_NO_NEW_DIAGONALS: 187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 188 case MAT_NOT_SYMMETRIC: 189 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 190 case MAT_HERMITIAN: 191 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 192 case MAT_SYMMETRIC: 193 case MAT_STRUCTURALLY_SYMMETRIC: 194 case MAT_NOT_HERMITIAN: 195 case MAT_SYMMETRY_ETERNAL: 196 case MAT_NOT_SYMMETRY_ETERNAL: 197 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 198 break; 199 case MAT_IGNORE_LOWER_TRIANGULAR: 200 a->ignore_ltriangular = PETSC_TRUE; 201 break; 202 case MAT_ERROR_LOWER_TRIANGULAR: 203 a->ignore_ltriangular = PETSC_FALSE; 204 break; 205 case MAT_GETROW_UPPERTRIANGULAR: 206 a->getrow_utriangular = PETSC_TRUE; 207 break; 208 default: 209 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 221 MatScalar *aa,*aa_i; 222 PetscScalar *v_i; 223 224 PetscFunctionBegin; 225 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 226 /* Get the upper triangular part of the row */ 227 bs = A->rmap.bs; 228 ai = a->i; 229 aj = a->j; 230 aa = a->a; 231 bs2 = a->bs2; 232 233 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 234 235 bn = row/bs; /* Block number */ 236 bp = row % bs; /* Block position */ 237 M = ai[bn+1] - ai[bn]; 238 *ncols = bs*M; 239 240 if (v) { 241 *v = 0; 242 if (*ncols) { 243 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 244 for (i=0; i<M; i++) { /* for each block in the block row */ 245 v_i = *v + i*bs; 246 aa_i = aa + bs2*(ai[bn] + i); 247 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 248 } 249 } 250 } 251 252 if (cols) { 253 *cols = 0; 254 if (*ncols) { 255 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 256 for (i=0; i<M; i++) { /* for each block in the block row */ 257 cols_i = *cols + i*bs; 258 itmp = bs*aj[ai[bn] + i]; 259 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 260 } 261 } 262 } 263 264 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 265 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 266 #ifdef column_search 267 v_i = *v + M*bs; 268 cols_i = *cols + M*bs; 269 for (i=0; i<bn; i++){ /* for each block row */ 270 M = ai[i+1] - ai[i]; 271 for (j=0; j<M; j++){ 272 itmp = aj[ai[i] + j]; /* block column value */ 273 if (itmp == bn){ 274 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 275 for (k=0; k<bs; k++) { 276 *cols_i++ = i*bs+k; 277 *v_i++ = aa_i[k]; 278 } 279 *ncols += bs; 280 break; 281 } 282 } 283 } 284 #endif 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 296 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 306 PetscFunctionBegin; 307 a->getrow_utriangular = PETSC_TRUE; 308 PetscFunctionReturn(0); 309 } 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 313 { 314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 a->getrow_utriangular = PETSC_FALSE; 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 PetscErrorCode ierr; 337 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 338 const char *name; 339 PetscViewerFormat format; 340 341 PetscFunctionBegin; 342 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 343 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 344 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 345 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 346 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 347 if (A->factor && bs>1){ 348 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 Mat aij; 352 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 353 ierr = MatView(aij,viewer);CHKERRQ(ierr); 354 ierr = MatDestroy(aij);CHKERRQ(ierr); 355 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 356 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 357 for (i=0; i<a->mbs; i++) { 358 for (j=0; j<bs; j++) { 359 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 360 for (k=a->i[i]; k<a->i[i+1]; k++) { 361 for (l=0; l<bs; l++) { 362 #if defined(PETSC_USE_COMPLEX) 363 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 364 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 365 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 366 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 367 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 368 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 369 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 370 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 371 } 372 #else 373 if (a->a[bs2*k + l*bs + j] != 0.0) { 374 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 375 } 376 #endif 377 } 378 } 379 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 380 } 381 } 382 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 383 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 384 PetscFunctionReturn(0); 385 } else { 386 if (A->factor && bs>1){ 387 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 388 } 389 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 390 for (i=0; i<a->mbs; i++) { 391 for (j=0; j<bs; j++) { 392 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 393 for (k=a->i[i]; k<a->i[i+1]; k++) { 394 for (l=0; l<bs; l++) { 395 #if defined(PETSC_USE_COMPLEX) 396 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 397 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 398 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 399 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 400 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 401 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 402 } else { 403 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 404 } 405 #else 406 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 407 #endif 408 } 409 } 410 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 411 } 412 } 413 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 414 } 415 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 416 PetscFunctionReturn(0); 417 } 418 419 #undef __FUNCT__ 420 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 421 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 422 { 423 Mat A = (Mat) Aa; 424 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 425 PetscErrorCode ierr; 426 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 427 PetscMPIInt rank; 428 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 429 MatScalar *aa; 430 MPI_Comm comm; 431 PetscViewer viewer; 432 433 PetscFunctionBegin; 434 /* 435 This is nasty. If this is called from an originally parallel matrix 436 then all processes call this,but only the first has the matrix so the 437 rest should return immediately. 438 */ 439 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 440 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 441 if (rank) PetscFunctionReturn(0); 442 443 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 444 445 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 446 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 447 448 /* loop over matrix elements drawing boxes */ 449 color = PETSC_DRAW_BLUE; 450 for (i=0,row=0; i<mbs; i++,row+=bs) { 451 for (j=a->i[i]; j<a->i[i+1]; j++) { 452 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 453 x_l = a->j[j]*bs; x_r = x_l + 1.0; 454 aa = a->a + j*bs2; 455 for (k=0; k<bs; k++) { 456 for (l=0; l<bs; l++) { 457 if (PetscRealPart(*aa++) >= 0.) continue; 458 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 459 } 460 } 461 } 462 } 463 color = PETSC_DRAW_CYAN; 464 for (i=0,row=0; i<mbs; i++,row+=bs) { 465 for (j=a->i[i]; j<a->i[i+1]; j++) { 466 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 467 x_l = a->j[j]*bs; x_r = x_l + 1.0; 468 aa = a->a + j*bs2; 469 for (k=0; k<bs; k++) { 470 for (l=0; l<bs; l++) { 471 if (PetscRealPart(*aa++) != 0.) continue; 472 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 473 } 474 } 475 } 476 } 477 478 color = PETSC_DRAW_RED; 479 for (i=0,row=0; i<mbs; i++,row+=bs) { 480 for (j=a->i[i]; j<a->i[i+1]; j++) { 481 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 482 x_l = a->j[j]*bs; x_r = x_l + 1.0; 483 aa = a->a + j*bs2; 484 for (k=0; k<bs; k++) { 485 for (l=0; l<bs; l++) { 486 if (PetscRealPart(*aa++) <= 0.) continue; 487 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 488 } 489 } 490 } 491 } 492 PetscFunctionReturn(0); 493 } 494 495 #undef __FUNCT__ 496 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 497 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 498 { 499 PetscErrorCode ierr; 500 PetscReal xl,yl,xr,yr,w,h; 501 PetscDraw draw; 502 PetscTruth isnull; 503 504 PetscFunctionBegin; 505 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 506 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 507 508 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 509 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 510 xr += w; yr += h; xl = -w; yl = -h; 511 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 512 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 513 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 #undef __FUNCT__ 518 #define __FUNCT__ "MatView_SeqSBAIJ" 519 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 520 { 521 PetscErrorCode ierr; 522 PetscTruth iascii,isdraw; 523 524 PetscFunctionBegin; 525 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 526 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 527 if (iascii){ 528 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 529 } else if (isdraw) { 530 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 531 } else { 532 Mat B; 533 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 534 ierr = MatView(B,viewer);CHKERRQ(ierr); 535 ierr = MatDestroy(B);CHKERRQ(ierr); 536 } 537 PetscFunctionReturn(0); 538 } 539 540 541 #undef __FUNCT__ 542 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 543 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 544 { 545 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 546 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 547 PetscInt *ai = a->i,*ailen = a->ilen; 548 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 549 MatScalar *ap,*aa = a->a; 550 551 PetscFunctionBegin; 552 for (k=0; k<m; k++) { /* loop over rows */ 553 row = im[k]; brow = row/bs; 554 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 555 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 556 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 557 nrow = ailen[brow]; 558 for (l=0; l<n; l++) { /* loop over columns */ 559 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 560 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 561 col = in[l] ; 562 bcol = col/bs; 563 cidx = col%bs; 564 ridx = row%bs; 565 high = nrow; 566 low = 0; /* assume unsorted */ 567 while (high-low > 5) { 568 t = (low+high)/2; 569 if (rp[t] > bcol) high = t; 570 else low = t; 571 } 572 for (i=low; i<high; i++) { 573 if (rp[i] > bcol) break; 574 if (rp[i] == bcol) { 575 *v++ = ap[bs2*i+bs*cidx+ridx]; 576 goto finished; 577 } 578 } 579 *v++ = 0.0; 580 finished:; 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 589 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 590 { 591 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 592 PetscErrorCode ierr; 593 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 594 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 595 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 596 PetscTruth roworiented=a->roworiented; 597 const MatScalar *value = v; 598 MatScalar *ap,*aa = a->a,*bap; 599 600 PetscFunctionBegin; 601 if (roworiented) { 602 stepval = (n-1)*bs; 603 } else { 604 stepval = (m-1)*bs; 605 } 606 for (k=0; k<m; k++) { /* loop over added rows */ 607 row = im[k]; 608 if (row < 0) continue; 609 #if defined(PETSC_USE_DEBUG) 610 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 611 #endif 612 rp = aj + ai[row]; 613 ap = aa + bs2*ai[row]; 614 rmax = imax[row]; 615 nrow = ailen[row]; 616 low = 0; 617 high = nrow; 618 for (l=0; l<n; l++) { /* loop over added columns */ 619 if (in[l] < 0) continue; 620 col = in[l]; 621 #if defined(PETSC_USE_DEBUG) 622 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 623 #endif 624 if (col < row) continue; /* ignore lower triangular block */ 625 if (roworiented) { 626 value = v + k*(stepval+bs)*bs + l*bs; 627 } else { 628 value = v + l*(stepval+bs)*bs + k*bs; 629 } 630 if (col <= lastcol) low = 0; else high = nrow; 631 lastcol = col; 632 while (high-low > 7) { 633 t = (low+high)/2; 634 if (rp[t] > col) high = t; 635 else low = t; 636 } 637 for (i=low; i<high; i++) { 638 if (rp[i] > col) break; 639 if (rp[i] == col) { 640 bap = ap + bs2*i; 641 if (roworiented) { 642 if (is == ADD_VALUES) { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] += *value++; 646 } 647 } 648 } else { 649 for (ii=0; ii<bs; ii++,value+=stepval) { 650 for (jj=ii; jj<bs2; jj+=bs) { 651 bap[jj] = *value++; 652 } 653 } 654 } 655 } else { 656 if (is == ADD_VALUES) { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ += *value++; 660 } 661 } 662 } else { 663 for (ii=0; ii<bs; ii++,value+=stepval) { 664 for (jj=0; jj<bs; jj++) { 665 *bap++ = *value++; 666 } 667 } 668 } 669 } 670 goto noinsert2; 671 } 672 } 673 if (nonew == 1) goto noinsert2; 674 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 675 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 676 N = nrow++ - 1; high++; 677 /* shift up all the later entries in this row */ 678 for (ii=N; ii>=i; ii--) { 679 rp[ii+1] = rp[ii]; 680 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 681 } 682 if (N >= i) { 683 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 684 } 685 rp[i] = col; 686 bap = ap + bs2*i; 687 if (roworiented) { 688 for (ii=0; ii<bs; ii++,value+=stepval) { 689 for (jj=ii; jj<bs2; jj+=bs) { 690 bap[jj] = *value++; 691 } 692 } 693 } else { 694 for (ii=0; ii<bs; ii++,value+=stepval) { 695 for (jj=0; jj<bs; jj++) { 696 *bap++ = *value++; 697 } 698 } 699 } 700 noinsert2:; 701 low = i; 702 } 703 ailen[row] = nrow; 704 } 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 710 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 711 { 712 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 715 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 716 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 717 MatScalar *aa = a->a,*ap; 718 719 PetscFunctionBegin; 720 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 721 722 if (m) rmax = ailen[0]; 723 for (i=1; i<mbs; i++) { 724 /* move each row back by the amount of empty slots (fshift) before it*/ 725 fshift += imax[i-1] - ailen[i-1]; 726 rmax = PetscMax(rmax,ailen[i]); 727 if (fshift) { 728 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 729 N = ailen[i]; 730 for (j=0; j<N; j++) { 731 ip[j-fshift] = ip[j]; 732 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 733 } 734 } 735 ai[i] = ai[i-1] + ailen[i-1]; 736 } 737 if (mbs) { 738 fshift += imax[mbs-1] - ailen[mbs-1]; 739 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 740 } 741 /* reset ilen and imax for each row */ 742 for (i=0; i<mbs; i++) { 743 ailen[i] = imax[i] = ai[i+1] - ai[i]; 744 } 745 a->nz = ai[mbs]; 746 747 /* diagonals may have moved, reset it */ 748 if (a->diag) { 749 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 750 } 751 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 752 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 753 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 754 a->reallocs = 0; 755 A->info.nz_unneeded = (PetscReal)fshift*bs2; 756 PetscFunctionReturn(0); 757 } 758 759 /* 760 This function returns an array of flags which indicate the locations of contiguous 761 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 762 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 763 Assume: sizes should be long enough to hold all the values. 764 */ 765 #undef __FUNCT__ 766 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 767 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 768 { 769 PetscInt i,j,k,row; 770 PetscTruth flg; 771 772 PetscFunctionBegin; 773 for (i=0,j=0; i<n; j++) { 774 row = idx[i]; 775 if (row%bs!=0) { /* Not the begining of a block */ 776 sizes[j] = 1; 777 i++; 778 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 779 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 780 i++; 781 } else { /* Begining of the block, so check if the complete block exists */ 782 flg = PETSC_TRUE; 783 for (k=1; k<bs; k++) { 784 if (row+k != idx[i+k]) { /* break in the block */ 785 flg = PETSC_FALSE; 786 break; 787 } 788 } 789 if (flg) { /* No break in the bs */ 790 sizes[j] = bs; 791 i+= bs; 792 } else { 793 sizes[j] = 1; 794 i++; 795 } 796 } 797 } 798 *bs_max = j; 799 PetscFunctionReturn(0); 800 } 801 802 803 /* Only add/insert a(i,j) with i<=j (blocks). 804 Any a(i,j) with i>j input by user is ingored. 805 */ 806 807 #undef __FUNCT__ 808 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 809 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 810 { 811 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 812 PetscErrorCode ierr; 813 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 814 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 815 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 816 PetscInt ridx,cidx,bs2=a->bs2; 817 MatScalar *ap,value,*aa=a->a,*bap; 818 819 PetscFunctionBegin; 820 for (k=0; k<m; k++) { /* loop over added rows */ 821 row = im[k]; /* row number */ 822 brow = row/bs; /* block row number */ 823 if (row < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 826 #endif 827 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 828 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 829 rmax = imax[brow]; /* maximum space allocated for this row */ 830 nrow = ailen[brow]; /* actual length of this row */ 831 low = 0; 832 833 for (l=0; l<n; l++) { /* loop over added columns */ 834 if (in[l] < 0) continue; 835 #if defined(PETSC_USE_DEBUG) 836 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 837 #endif 838 col = in[l]; 839 bcol = col/bs; /* block col number */ 840 841 if (brow > bcol) { 842 if (a->ignore_ltriangular){ 843 continue; /* ignore lower triangular values */ 844 } else { 845 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 846 } 847 } 848 849 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 850 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 851 /* element value a(k,l) */ 852 if (roworiented) { 853 value = v[l + k*n]; 854 } else { 855 value = v[k + l*m]; 856 } 857 858 /* move pointer bap to a(k,l) quickly and add/insert value */ 859 if (col <= lastcol) low = 0; high = nrow; 860 lastcol = col; 861 while (high-low > 7) { 862 t = (low+high)/2; 863 if (rp[t] > bcol) high = t; 864 else low = t; 865 } 866 for (i=low; i<high; i++) { 867 if (rp[i] > bcol) break; 868 if (rp[i] == bcol) { 869 bap = ap + bs2*i + bs*cidx + ridx; 870 if (is == ADD_VALUES) *bap += value; 871 else *bap = value; 872 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 873 if (brow == bcol && ridx < cidx){ 874 bap = ap + bs2*i + bs*ridx + cidx; 875 if (is == ADD_VALUES) *bap += value; 876 else *bap = value; 877 } 878 goto noinsert1; 879 } 880 } 881 882 if (nonew == 1) goto noinsert1; 883 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 884 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 885 886 N = nrow++ - 1; high++; 887 /* shift up all the later entries in this row */ 888 for (ii=N; ii>=i; ii--) { 889 rp[ii+1] = rp[ii]; 890 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 891 } 892 if (N>=i) { 893 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 894 } 895 rp[i] = bcol; 896 ap[bs2*i + bs*cidx + ridx] = value; 897 noinsert1:; 898 low = i; 899 } 900 } /* end of loop over added columns */ 901 ailen[brow] = nrow; 902 } /* end of loop over added rows */ 903 PetscFunctionReturn(0); 904 } 905 906 #undef __FUNCT__ 907 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 908 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 909 { 910 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 911 Mat outA; 912 PetscErrorCode ierr; 913 PetscTruth row_identity; 914 915 PetscFunctionBegin; 916 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 917 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 918 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 919 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 920 921 outA = inA; 922 inA->factor = FACTOR_CHOLESKY; 923 924 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 925 /* 926 Blocksize < 8 have a special faster factorization/solver 927 for ICC(0) factorization with natural ordering 928 */ 929 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 930 case 1: 931 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 932 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 933 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 934 inA->ops->solves = MatSolves_SeqSBAIJ_1; 935 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 936 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 937 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 938 break; 939 case 2: 940 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 941 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 942 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 943 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 944 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 945 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 946 break; 947 case 3: 948 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 949 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 950 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 951 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 952 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 953 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 954 break; 955 case 4: 956 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 957 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 958 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 959 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 960 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 961 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 962 break; 963 case 5: 964 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 965 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 966 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 967 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 968 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 969 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 970 break; 971 case 6: 972 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 973 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 974 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 975 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 976 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 977 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 978 break; 979 case 7: 980 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 981 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 982 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 983 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 984 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 985 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 986 break; 987 default: 988 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 989 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 990 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 991 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 992 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 993 break; 994 } 995 996 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 997 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 998 a->row = row; 999 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1000 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 1001 a->col = row; 1002 1003 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 1004 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 1005 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1006 1007 if (!a->solve_work) { 1008 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1009 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1010 } 1011 1012 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 EXTERN_C_BEGIN 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1020 { 1021 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1022 PetscInt i,nz,n; 1023 1024 PetscFunctionBegin; 1025 nz = baij->maxnz; 1026 n = mat->cmap.n; 1027 for (i=0; i<nz; i++) { 1028 baij->j[i] = indices[i]; 1029 } 1030 baij->nz = nz; 1031 for (i=0; i<n; i++) { 1032 baij->ilen[i] = baij->imax[i]; 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 EXTERN_C_END 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1040 /*@ 1041 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1042 in the matrix. 1043 1044 Input Parameters: 1045 + mat - the SeqSBAIJ matrix 1046 - indices - the column indices 1047 1048 Level: advanced 1049 1050 Notes: 1051 This can be called if you have precomputed the nonzero structure of the 1052 matrix and want to provide it to the matrix object to improve the performance 1053 of the MatSetValues() operation. 1054 1055 You MUST have set the correct numbers of nonzeros per row in the call to 1056 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1057 1058 MUST be called before any calls to MatSetValues() 1059 1060 .seealso: MatCreateSeqSBAIJ 1061 @*/ 1062 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1063 { 1064 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1065 1066 PetscFunctionBegin; 1067 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1068 PetscValidPointer(indices,2); 1069 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1070 if (f) { 1071 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1072 } else { 1073 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1080 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 /* If the two matrices have the same copy implementation, use fast copy. */ 1086 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1087 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1088 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1089 1090 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1091 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1092 } 1093 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1094 } else { 1095 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1096 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1097 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1104 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1105 { 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1115 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1116 { 1117 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1118 PetscFunctionBegin; 1119 *array = a->a; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1125 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1126 { 1127 PetscFunctionBegin; 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #include "petscblaslapack.h" 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1134 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1135 { 1136 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1137 PetscErrorCode ierr; 1138 PetscInt i,bs=Y->rmap.bs,bs2,j; 1139 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1140 1141 PetscFunctionBegin; 1142 if (str == SAME_NONZERO_PATTERN) { 1143 PetscScalar alpha = a; 1144 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1145 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1146 if (y->xtoy && y->XtoY != X) { 1147 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1148 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1149 } 1150 if (!y->xtoy) { /* get xtoy */ 1151 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1152 y->XtoY = X; 1153 } 1154 bs2 = bs*bs; 1155 for (i=0; i<x->nz; i++) { 1156 j = 0; 1157 while (j < bs2){ 1158 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1159 j++; 1160 } 1161 } 1162 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1163 } else { 1164 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1165 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1166 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1173 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1174 { 1175 PetscFunctionBegin; 1176 *flg = PETSC_TRUE; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #undef __FUNCT__ 1181 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1182 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1183 { 1184 PetscFunctionBegin; 1185 *flg = PETSC_TRUE; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1191 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1192 { 1193 PetscFunctionBegin; 1194 *flg = PETSC_FALSE; 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1200 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1201 { 1202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1203 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1204 PetscScalar *aa = a->a; 1205 1206 PetscFunctionBegin; 1207 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 #undef __FUNCT__ 1212 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1213 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1214 { 1215 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1216 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1217 PetscScalar *aa = a->a; 1218 1219 PetscFunctionBegin; 1220 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 /* -------------------------------------------------------------------*/ 1225 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1226 MatGetRow_SeqSBAIJ, 1227 MatRestoreRow_SeqSBAIJ, 1228 MatMult_SeqSBAIJ_N, 1229 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1230 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1231 MatMultAdd_SeqSBAIJ_N, 1232 MatSolve_SeqSBAIJ_N, 1233 0, 1234 0, 1235 /*10*/ 0, 1236 0, 1237 MatCholeskyFactor_SeqSBAIJ, 1238 MatRelax_SeqSBAIJ, 1239 MatTranspose_SeqSBAIJ, 1240 /*15*/ MatGetInfo_SeqSBAIJ, 1241 MatEqual_SeqSBAIJ, 1242 MatGetDiagonal_SeqSBAIJ, 1243 MatDiagonalScale_SeqSBAIJ, 1244 MatNorm_SeqSBAIJ, 1245 /*20*/ 0, 1246 MatAssemblyEnd_SeqSBAIJ, 1247 0, 1248 MatSetOption_SeqSBAIJ, 1249 MatZeroEntries_SeqSBAIJ, 1250 /*25*/ 0, 1251 0, 1252 0, 1253 MatCholeskyFactorSymbolic_SeqSBAIJ, 1254 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1255 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1256 0, 1257 MatICCFactorSymbolic_SeqSBAIJ, 1258 MatGetArray_SeqSBAIJ, 1259 MatRestoreArray_SeqSBAIJ, 1260 /*35*/ MatDuplicate_SeqSBAIJ, 1261 MatForwardSolve_SeqSBAIJ_N, 1262 MatBackwardSolve_SeqSBAIJ_N, 1263 0, 1264 MatICCFactor_SeqSBAIJ, 1265 /*40*/ MatAXPY_SeqSBAIJ, 1266 MatGetSubMatrices_SeqSBAIJ, 1267 MatIncreaseOverlap_SeqSBAIJ, 1268 MatGetValues_SeqSBAIJ, 1269 MatCopy_SeqSBAIJ, 1270 /*45*/ 0, 1271 MatScale_SeqSBAIJ, 1272 0, 1273 0, 1274 0, 1275 /*50*/ 0, 1276 MatGetRowIJ_SeqSBAIJ, 1277 MatRestoreRowIJ_SeqSBAIJ, 1278 0, 1279 0, 1280 /*55*/ 0, 1281 0, 1282 0, 1283 0, 1284 MatSetValuesBlocked_SeqSBAIJ, 1285 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*65*/ 0, 1291 0, 1292 0, 1293 0, 1294 0, 1295 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1296 0, 1297 0, 1298 0, 1299 0, 1300 /*75*/ 0, 1301 0, 1302 0, 1303 0, 1304 0, 1305 /*80*/ 0, 1306 0, 1307 0, 1308 #if !defined(PETSC_USE_COMPLEX) 1309 MatGetInertia_SeqSBAIJ, 1310 #else 1311 0, 1312 #endif 1313 MatLoad_SeqSBAIJ, 1314 /*85*/ MatIsSymmetric_SeqSBAIJ, 1315 MatIsHermitian_SeqSBAIJ, 1316 MatIsStructurallySymmetric_SeqSBAIJ, 1317 0, 1318 0, 1319 /*90*/ 0, 1320 0, 1321 0, 1322 0, 1323 0, 1324 /*95*/ 0, 1325 0, 1326 0, 1327 0, 1328 0, 1329 /*100*/0, 1330 0, 1331 0, 1332 0, 1333 0, 1334 /*105*/0, 1335 MatRealPart_SeqSBAIJ, 1336 MatImaginaryPart_SeqSBAIJ, 1337 MatGetRowUpperTriangular_SeqSBAIJ, 1338 MatRestoreRowUpperTriangular_SeqSBAIJ 1339 }; 1340 1341 EXTERN_C_BEGIN 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1344 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1345 { 1346 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1347 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 if (aij->nonew != 1) { 1352 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1353 } 1354 1355 /* allocate space for values if not already there */ 1356 if (!aij->saved_values) { 1357 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1358 } 1359 1360 /* copy values over */ 1361 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 EXTERN_C_END 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1369 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1370 { 1371 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1372 PetscErrorCode ierr; 1373 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1374 1375 PetscFunctionBegin; 1376 if (aij->nonew != 1) { 1377 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1378 } 1379 if (!aij->saved_values) { 1380 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1381 } 1382 1383 /* copy values over */ 1384 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 EXTERN_C_END 1388 1389 EXTERN_C_BEGIN 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1392 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1393 { 1394 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1395 PetscErrorCode ierr; 1396 PetscInt i,mbs,bs2; 1397 PetscTruth skipallocation = PETSC_FALSE,flg; 1398 1399 PetscFunctionBegin; 1400 B->preallocated = PETSC_TRUE; 1401 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1402 B->rmap.bs = B->cmap.bs = bs; 1403 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1404 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1405 1406 mbs = B->rmap.N/bs; 1407 bs2 = bs*bs; 1408 1409 if (mbs*bs != B->rmap.N) { 1410 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1411 } 1412 1413 if (nz == MAT_SKIP_ALLOCATION) { 1414 skipallocation = PETSC_TRUE; 1415 nz = 0; 1416 } 1417 1418 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1419 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1420 if (nnz) { 1421 for (i=0; i<mbs; i++) { 1422 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1423 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1424 } 1425 } 1426 1427 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1428 if (!flg) { 1429 switch (bs) { 1430 case 1: 1431 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1432 B->ops->solve = MatSolve_SeqSBAIJ_1; 1433 B->ops->solves = MatSolves_SeqSBAIJ_1; 1434 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1435 B->ops->mult = MatMult_SeqSBAIJ_1; 1436 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1437 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1438 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1439 break; 1440 case 2: 1441 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1442 B->ops->solve = MatSolve_SeqSBAIJ_2; 1443 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1444 B->ops->mult = MatMult_SeqSBAIJ_2; 1445 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1446 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1447 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1448 break; 1449 case 3: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1451 B->ops->solve = MatSolve_SeqSBAIJ_3; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1453 B->ops->mult = MatMult_SeqSBAIJ_3; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1455 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1456 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1457 break; 1458 case 4: 1459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1460 B->ops->solve = MatSolve_SeqSBAIJ_4; 1461 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1462 B->ops->mult = MatMult_SeqSBAIJ_4; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1464 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1465 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1466 break; 1467 case 5: 1468 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1469 B->ops->solve = MatSolve_SeqSBAIJ_5; 1470 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1471 B->ops->mult = MatMult_SeqSBAIJ_5; 1472 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1473 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1474 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1475 break; 1476 case 6: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1478 B->ops->solve = MatSolve_SeqSBAIJ_6; 1479 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1480 B->ops->mult = MatMult_SeqSBAIJ_6; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1482 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1483 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1484 break; 1485 case 7: 1486 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1487 B->ops->solve = MatSolve_SeqSBAIJ_7; 1488 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1489 B->ops->mult = MatMult_SeqSBAIJ_7; 1490 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1491 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1492 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1493 break; 1494 } 1495 } 1496 1497 b->mbs = mbs; 1498 b->nbs = mbs; 1499 if (!skipallocation) { 1500 /* b->ilen will count nonzeros in each block row so far. */ 1501 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1502 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1503 if (!nnz) { 1504 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1505 else if (nz <= 0) nz = 1; 1506 for (i=0; i<mbs; i++) { 1507 b->imax[i] = nz; 1508 } 1509 nz = nz*mbs; /* total nz */ 1510 } else { 1511 nz = 0; 1512 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1513 } 1514 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1515 1516 /* allocate the matrix space */ 1517 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1519 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1520 b->singlemalloc = PETSC_TRUE; 1521 1522 /* pointer to beginning of each row */ 1523 b->i[0] = 0; 1524 for (i=1; i<mbs+1; i++) { 1525 b->i[i] = b->i[i-1] + b->imax[i-1]; 1526 } 1527 b->free_a = PETSC_TRUE; 1528 b->free_ij = PETSC_TRUE; 1529 } else { 1530 b->free_a = PETSC_FALSE; 1531 b->free_ij = PETSC_FALSE; 1532 } 1533 1534 B->rmap.bs = bs; 1535 b->bs2 = bs2; 1536 b->nz = 0; 1537 b->maxnz = nz*bs2; 1538 1539 b->inew = 0; 1540 b->jnew = 0; 1541 b->anew = 0; 1542 b->a2anew = 0; 1543 b->permute = PETSC_FALSE; 1544 PetscFunctionReturn(0); 1545 } 1546 EXTERN_C_END 1547 1548 EXTERN_C_BEGIN 1549 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1550 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1551 EXTERN_C_END 1552 1553 /*MC 1554 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1555 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1556 1557 Options Database Keys: 1558 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1559 1560 Level: beginner 1561 1562 .seealso: MatCreateSeqSBAIJ 1563 M*/ 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1568 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1569 { 1570 Mat_SeqSBAIJ *b; 1571 PetscErrorCode ierr; 1572 PetscMPIInt size; 1573 PetscTruth flg; 1574 1575 PetscFunctionBegin; 1576 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1577 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1578 1579 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1580 B->data = (void*)b; 1581 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1582 B->ops->destroy = MatDestroy_SeqSBAIJ; 1583 B->ops->view = MatView_SeqSBAIJ; 1584 B->factor = 0; 1585 B->mapping = 0; 1586 b->row = 0; 1587 b->icol = 0; 1588 b->reallocs = 0; 1589 b->saved_values = 0; 1590 1591 1592 b->sorted = PETSC_FALSE; 1593 b->roworiented = PETSC_TRUE; 1594 b->nonew = 0; 1595 b->diag = 0; 1596 b->solve_work = 0; 1597 b->mult_work = 0; 1598 B->spptr = 0; 1599 b->keepzeroedrows = PETSC_FALSE; 1600 b->xtoy = 0; 1601 b->XtoY = 0; 1602 1603 b->inew = 0; 1604 b->jnew = 0; 1605 b->anew = 0; 1606 b->a2anew = 0; 1607 b->permute = PETSC_FALSE; 1608 1609 b->ignore_ltriangular = PETSC_FALSE; 1610 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1611 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1612 1613 b->getrow_utriangular = PETSC_FALSE; 1614 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1615 if (flg) b->getrow_utriangular = PETSC_TRUE; 1616 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1618 "MatStoreValues_SeqSBAIJ", 1619 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1621 "MatRetrieveValues_SeqSBAIJ", 1622 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1624 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1625 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1627 "MatConvert_SeqSBAIJ_SeqAIJ", 1628 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1630 "MatConvert_SeqSBAIJ_SeqBAIJ", 1631 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1633 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1634 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1635 1636 B->symmetric = PETSC_TRUE; 1637 B->structurally_symmetric = PETSC_TRUE; 1638 B->symmetric_set = PETSC_TRUE; 1639 B->structurally_symmetric_set = PETSC_TRUE; 1640 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1641 PetscFunctionReturn(0); 1642 } 1643 EXTERN_C_END 1644 1645 #undef __FUNCT__ 1646 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1647 /*@C 1648 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1649 compressed row) format. For good matrix assembly performance the 1650 user should preallocate the matrix storage by setting the parameter nz 1651 (or the array nnz). By setting these parameters accurately, performance 1652 during matrix assembly can be increased by more than a factor of 50. 1653 1654 Collective on Mat 1655 1656 Input Parameters: 1657 + A - the symmetric matrix 1658 . bs - size of block 1659 . nz - number of block nonzeros per block row (same for all rows) 1660 - nnz - array containing the number of block nonzeros in the upper triangular plus 1661 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1662 1663 Options Database Keys: 1664 . -mat_no_unroll - uses code that does not unroll the loops in the 1665 block calculations (much slower) 1666 . -mat_block_size - size of the blocks to use 1667 1668 Level: intermediate 1669 1670 Notes: 1671 Specify the preallocated storage with either nz or nnz (not both). 1672 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1673 allocation. For additional details, see the users manual chapter on 1674 matrices. 1675 1676 If the nnz parameter is given then the nz parameter is ignored 1677 1678 1679 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1680 @*/ 1681 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1682 { 1683 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1684 1685 PetscFunctionBegin; 1686 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1687 if (f) { 1688 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1689 } 1690 PetscFunctionReturn(0); 1691 } 1692 1693 #undef __FUNCT__ 1694 #define __FUNCT__ "MatCreateSeqSBAIJ" 1695 /*@C 1696 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1697 compressed row) format. For good matrix assembly performance the 1698 user should preallocate the matrix storage by setting the parameter nz 1699 (or the array nnz). By setting these parameters accurately, performance 1700 during matrix assembly can be increased by more than a factor of 50. 1701 1702 Collective on MPI_Comm 1703 1704 Input Parameters: 1705 + comm - MPI communicator, set to PETSC_COMM_SELF 1706 . bs - size of block 1707 . m - number of rows, or number of columns 1708 . nz - number of block nonzeros per block row (same for all rows) 1709 - nnz - array containing the number of block nonzeros in the upper triangular plus 1710 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1711 1712 Output Parameter: 1713 . A - the symmetric matrix 1714 1715 Options Database Keys: 1716 . -mat_no_unroll - uses code that does not unroll the loops in the 1717 block calculations (much slower) 1718 . -mat_block_size - size of the blocks to use 1719 1720 Level: intermediate 1721 1722 Notes: 1723 The number of rows and columns must be divisible by blocksize. 1724 This matrix type does not support complex Hermitian operation. 1725 1726 Specify the preallocated storage with either nz or nnz (not both). 1727 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1728 allocation. For additional details, see the users manual chapter on 1729 matrices. 1730 1731 If the nnz parameter is given then the nz parameter is ignored 1732 1733 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1734 @*/ 1735 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1736 { 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1741 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1742 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1743 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1744 PetscFunctionReturn(0); 1745 } 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1749 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1750 { 1751 Mat C; 1752 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1753 PetscErrorCode ierr; 1754 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1755 1756 PetscFunctionBegin; 1757 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1758 1759 *B = 0; 1760 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1761 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1762 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1763 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1764 c = (Mat_SeqSBAIJ*)C->data; 1765 1766 C->preallocated = PETSC_TRUE; 1767 C->factor = A->factor; 1768 c->row = 0; 1769 c->icol = 0; 1770 c->saved_values = 0; 1771 c->keepzeroedrows = a->keepzeroedrows; 1772 C->assembled = PETSC_TRUE; 1773 1774 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1775 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1776 c->bs2 = a->bs2; 1777 c->mbs = a->mbs; 1778 c->nbs = a->nbs; 1779 1780 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1781 for (i=0; i<mbs; i++) { 1782 c->imax[i] = a->imax[i]; 1783 c->ilen[i] = a->ilen[i]; 1784 } 1785 1786 /* allocate the matrix space */ 1787 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1788 c->singlemalloc = PETSC_TRUE; 1789 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1791 if (mbs > 0) { 1792 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1793 if (cpvalues == MAT_COPY_VALUES) { 1794 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1795 } else { 1796 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1797 } 1798 } 1799 1800 c->sorted = a->sorted; 1801 c->roworiented = a->roworiented; 1802 c->nonew = a->nonew; 1803 1804 if (a->diag) { 1805 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1806 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1807 for (i=0; i<mbs; i++) { 1808 c->diag[i] = a->diag[i]; 1809 } 1810 } else c->diag = 0; 1811 c->nz = a->nz; 1812 c->maxnz = a->maxnz; 1813 c->solve_work = 0; 1814 c->mult_work = 0; 1815 c->free_a = PETSC_TRUE; 1816 c->free_ij = PETSC_TRUE; 1817 *B = C; 1818 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 #undef __FUNCT__ 1823 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1824 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1825 { 1826 Mat_SeqSBAIJ *a; 1827 Mat B; 1828 PetscErrorCode ierr; 1829 int fd; 1830 PetscMPIInt size; 1831 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1832 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1833 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1834 PetscInt *masked,nmask,tmp,bs2,ishift; 1835 PetscScalar *aa; 1836 MPI_Comm comm = ((PetscObject)viewer)->comm; 1837 1838 PetscFunctionBegin; 1839 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1840 bs2 = bs*bs; 1841 1842 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1843 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1844 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1845 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1846 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1847 M = header[1]; N = header[2]; nz = header[3]; 1848 1849 if (header[3] < 0) { 1850 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1851 } 1852 1853 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1854 1855 /* 1856 This code adds extra rows to make sure the number of rows is 1857 divisible by the blocksize 1858 */ 1859 mbs = M/bs; 1860 extra_rows = bs - M + bs*(mbs); 1861 if (extra_rows == bs) extra_rows = 0; 1862 else mbs++; 1863 if (extra_rows) { 1864 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1865 } 1866 1867 /* read in row lengths */ 1868 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1869 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1870 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1871 1872 /* read in column indices */ 1873 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1874 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1875 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1876 1877 /* loop over row lengths determining block row lengths */ 1878 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1879 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1880 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1881 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1882 masked = mask + mbs; 1883 rowcount = 0; nzcount = 0; 1884 for (i=0; i<mbs; i++) { 1885 nmask = 0; 1886 for (j=0; j<bs; j++) { 1887 kmax = rowlengths[rowcount]; 1888 for (k=0; k<kmax; k++) { 1889 tmp = jj[nzcount++]/bs; /* block col. index */ 1890 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1891 } 1892 rowcount++; 1893 } 1894 s_browlengths[i] += nmask; 1895 1896 /* zero out the mask elements we set */ 1897 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1898 } 1899 1900 /* create our matrix */ 1901 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1902 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1903 ierr = MatSetType(B,type);CHKERRQ(ierr); 1904 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1905 a = (Mat_SeqSBAIJ*)B->data; 1906 1907 /* set matrix "i" values */ 1908 a->i[0] = 0; 1909 for (i=1; i<= mbs; i++) { 1910 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1911 a->ilen[i-1] = s_browlengths[i-1]; 1912 } 1913 a->nz = a->i[mbs]; 1914 1915 /* read in nonzero values */ 1916 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1917 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1918 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1919 1920 /* set "a" and "j" values into matrix */ 1921 nzcount = 0; jcount = 0; 1922 for (i=0; i<mbs; i++) { 1923 nzcountb = nzcount; 1924 nmask = 0; 1925 for (j=0; j<bs; j++) { 1926 kmax = rowlengths[i*bs+j]; 1927 for (k=0; k<kmax; k++) { 1928 tmp = jj[nzcount++]/bs; /* block col. index */ 1929 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1930 } 1931 } 1932 /* sort the masked values */ 1933 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1934 1935 /* set "j" values into matrix */ 1936 maskcount = 1; 1937 for (j=0; j<nmask; j++) { 1938 a->j[jcount++] = masked[j]; 1939 mask[masked[j]] = maskcount++; 1940 } 1941 1942 /* set "a" values into matrix */ 1943 ishift = bs2*a->i[i]; 1944 for (j=0; j<bs; j++) { 1945 kmax = rowlengths[i*bs+j]; 1946 for (k=0; k<kmax; k++) { 1947 tmp = jj[nzcountb]/bs ; /* block col. index */ 1948 if (tmp >= i){ 1949 block = mask[tmp] - 1; 1950 point = jj[nzcountb] - bs*tmp; 1951 idx = ishift + bs2*block + j + bs*point; 1952 a->a[idx] = aa[nzcountb]; 1953 } 1954 nzcountb++; 1955 } 1956 } 1957 /* zero out the mask elements we set */ 1958 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1959 } 1960 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1961 1962 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1963 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1964 ierr = PetscFree(aa);CHKERRQ(ierr); 1965 ierr = PetscFree(jj);CHKERRQ(ierr); 1966 ierr = PetscFree(mask);CHKERRQ(ierr); 1967 1968 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1970 ierr = MatView_Private(B);CHKERRQ(ierr); 1971 *A = B; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 #undef __FUNCT__ 1976 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1977 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1978 { 1979 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1980 MatScalar *aa=a->a,*v,*v1; 1981 PetscScalar *x,*b,*t,sum,d; 1982 PetscErrorCode ierr; 1983 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1984 PetscInt nz,nz1,*vj,*vj1,i; 1985 1986 PetscFunctionBegin; 1987 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1988 1989 its = its*lits; 1990 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1991 1992 if (bs > 1) 1993 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1994 1995 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1996 if (xx != bb) { 1997 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1998 } else { 1999 b = x; 2000 } 2001 2002 if (!a->relax_work) { 2003 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2004 } 2005 t = a->relax_work; 2006 2007 if (flag & SOR_ZERO_INITIAL_GUESS) { 2008 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2009 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2010 2011 for (i=0; i<m; i++){ 2012 d = *(aa + ai[i]); /* diag[i] */ 2013 v = aa + ai[i] + 1; 2014 vj = aj + ai[i] + 1; 2015 nz = ai[i+1] - ai[i] - 1; 2016 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2017 x[i] = omega*t[i]/d; 2018 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2019 } 2020 } 2021 2022 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2023 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2024 t = b; 2025 } 2026 2027 for (i=m-1; i>=0; i--){ 2028 d = *(aa + ai[i]); 2029 v = aa + ai[i] + 1; 2030 vj = aj + ai[i] + 1; 2031 nz = ai[i+1] - ai[i] - 1; 2032 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2033 sum = t[i]; 2034 while (nz--) sum -= x[*vj++]*(*v++); 2035 x[i] = (1-omega)*x[i] + omega*sum/d; 2036 } 2037 t = a->relax_work; 2038 } 2039 its--; 2040 } 2041 2042 while (its--) { 2043 /* 2044 forward sweep: 2045 for i=0,...,m-1: 2046 sum[i] = (b[i] - U(i,:)x )/d[i]; 2047 x[i] = (1-omega)x[i] + omega*sum[i]; 2048 b = b - x[i]*U^T(i,:); 2049 2050 */ 2051 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2052 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2053 2054 for (i=0; i<m; i++){ 2055 d = *(aa + ai[i]); /* diag[i] */ 2056 v = aa + ai[i] + 1; v1=v; 2057 vj = aj + ai[i] + 1; vj1=vj; 2058 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2059 sum = t[i]; 2060 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2061 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2062 x[i] = (1-omega)*x[i] + omega*sum/d; 2063 while (nz--) t[*vj++] -= x[i]*(*v++); 2064 } 2065 } 2066 2067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2068 /* 2069 backward sweep: 2070 b = b - x[i]*U^T(i,:), i=0,...,n-2 2071 for i=m-1,...,0: 2072 sum[i] = (b[i] - U(i,:)x )/d[i]; 2073 x[i] = (1-omega)x[i] + omega*sum[i]; 2074 */ 2075 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2076 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2077 2078 for (i=0; i<m-1; i++){ /* update rhs */ 2079 v = aa + ai[i] + 1; 2080 vj = aj + ai[i] + 1; 2081 nz = ai[i+1] - ai[i] - 1; 2082 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2083 while (nz--) t[*vj++] -= x[i]*(*v++); 2084 } 2085 for (i=m-1; i>=0; i--){ 2086 d = *(aa + ai[i]); 2087 v = aa + ai[i] + 1; 2088 vj = aj + ai[i] + 1; 2089 nz = ai[i+1] - ai[i] - 1; 2090 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2091 sum = t[i]; 2092 while (nz--) sum -= x[*vj++]*(*v++); 2093 x[i] = (1-omega)*x[i] + omega*sum/d; 2094 } 2095 } 2096 } 2097 2098 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2099 if (bb != xx) { 2100 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2101 } 2102 PetscFunctionReturn(0); 2103 } 2104 2105 #undef __FUNCT__ 2106 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2107 /*@ 2108 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2109 (upper triangular entries in CSR format) provided by the user. 2110 2111 Collective on MPI_Comm 2112 2113 Input Parameters: 2114 + comm - must be an MPI communicator of size 1 2115 . bs - size of block 2116 . m - number of rows 2117 . n - number of columns 2118 . i - row indices 2119 . j - column indices 2120 - a - matrix values 2121 2122 Output Parameter: 2123 . mat - the matrix 2124 2125 Level: intermediate 2126 2127 Notes: 2128 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2129 once the matrix is destroyed 2130 2131 You cannot set new nonzero locations into this matrix, that will generate an error. 2132 2133 The i and j indices are 0 based 2134 2135 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2136 2137 @*/ 2138 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2139 { 2140 PetscErrorCode ierr; 2141 PetscInt ii; 2142 Mat_SeqSBAIJ *sbaij; 2143 2144 PetscFunctionBegin; 2145 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2146 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2147 2148 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2149 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2150 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2151 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2152 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2153 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2154 2155 sbaij->i = i; 2156 sbaij->j = j; 2157 sbaij->a = a; 2158 sbaij->singlemalloc = PETSC_FALSE; 2159 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2160 sbaij->free_a = PETSC_FALSE; 2161 sbaij->free_ij = PETSC_FALSE; 2162 2163 for (ii=0; ii<m; ii++) { 2164 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2165 #if defined(PETSC_USE_DEBUG) 2166 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2167 #endif 2168 } 2169 #if defined(PETSC_USE_DEBUG) 2170 for (ii=0; ii<sbaij->i[m]; ii++) { 2171 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2172 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2173 } 2174 #endif 2175 2176 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2177 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 2182 2183 2184 2185