1 /*$Id: baij.c,v 1.233 2001/07/11 19:38:40 buschelm Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 #if defined(PETSC_USE_MAT_SINGLE) 216 if (a->bs==4) { 217 PetscTruth sse_enabled; 218 int ierr; 219 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 220 if (sse_enabled) { 221 a->singleprecisionsolves = PETSC_TRUE; 222 } else { 223 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 224 } 225 } else { 226 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 227 } 228 #else 229 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 230 #endif 231 break; 232 default: 233 SETERRQ(PETSC_ERR_SUP,"unknown option"); 234 } 235 PetscFunctionReturn(0); 236 } 237 238 #undef __FUNCT__ 239 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 240 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 241 { 242 PetscFunctionBegin; 243 if (m) *m = 0; 244 if (n) *n = A->m; 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatGetRow_SeqBAIJ" 250 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 251 { 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 253 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 254 MatScalar *aa,*aa_i; 255 Scalar *v_i; 256 257 PetscFunctionBegin; 258 bs = a->bs; 259 ai = a->i; 260 aj = a->j; 261 aa = a->a; 262 bs2 = a->bs2; 263 264 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 265 266 bn = row/bs; /* Block number */ 267 bp = row % bs; /* Block Position */ 268 M = ai[bn+1] - ai[bn]; 269 *nz = bs*M; 270 271 if (v) { 272 *v = 0; 273 if (*nz) { 274 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 275 for (i=0; i<M; i++) { /* for each block in the block row */ 276 v_i = *v + i*bs; 277 aa_i = aa + bs2*(ai[bn] + i); 278 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 279 } 280 } 281 } 282 283 if (idx) { 284 *idx = 0; 285 if (*nz) { 286 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 287 for (i=0; i<M; i++) { /* for each block in the block row */ 288 idx_i = *idx + i*bs; 289 itmp = bs*aj[ai[bn] + i]; 290 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 291 } 292 } 293 } 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 299 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 300 { 301 int ierr; 302 303 PetscFunctionBegin; 304 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 305 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatTranspose_SeqBAIJ" 311 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 312 { 313 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 314 Mat C; 315 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 316 int *rows,*cols,bs2=a->bs2; 317 Scalar *array; 318 319 PetscFunctionBegin; 320 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 321 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 322 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 323 324 #if defined(PETSC_USE_MAT_SINGLE) 325 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 326 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 327 #else 328 array = a->a; 329 #endif 330 331 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 332 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 333 ierr = PetscFree(col);CHKERRQ(ierr); 334 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 335 cols = rows + bs; 336 for (i=0; i<mbs; i++) { 337 cols[0] = i*bs; 338 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 339 len = ai[i+1] - ai[i]; 340 for (j=0; j<len; j++) { 341 rows[0] = (*aj++)*bs; 342 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 343 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 344 array += bs2; 345 } 346 } 347 ierr = PetscFree(rows);CHKERRQ(ierr); 348 #if defined(PETSC_USE_MAT_SINGLE) 349 ierr = PetscFree(array); 350 #endif 351 352 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 353 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 354 355 if (B) { 356 *B = C; 357 } else { 358 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 359 } 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 365 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 366 { 367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 368 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 369 Scalar *aa; 370 FILE *file; 371 372 PetscFunctionBegin; 373 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 374 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 375 col_lens[0] = MAT_COOKIE; 376 377 col_lens[1] = A->m; 378 col_lens[2] = A->n; 379 col_lens[3] = a->nz*bs2; 380 381 /* store lengths of each row and write (including header) to file */ 382 count = 0; 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 386 } 387 } 388 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 389 ierr = PetscFree(col_lens);CHKERRQ(ierr); 390 391 /* store column indices (zero start index) */ 392 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 393 count = 0; 394 for (i=0; i<a->mbs; i++) { 395 for (j=0; j<bs; j++) { 396 for (k=a->i[i]; k<a->i[i+1]; k++) { 397 for (l=0; l<bs; l++) { 398 jj[count++] = bs*a->j[k] + l; 399 } 400 } 401 } 402 } 403 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 404 ierr = PetscFree(jj);CHKERRQ(ierr); 405 406 /* store nonzero values */ 407 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 408 count = 0; 409 for (i=0; i<a->mbs; i++) { 410 for (j=0; j<bs; j++) { 411 for (k=a->i[i]; k<a->i[i+1]; k++) { 412 for (l=0; l<bs; l++) { 413 aa[count++] = a->a[bs2*k + l*bs + j]; 414 } 415 } 416 } 417 } 418 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 419 ierr = PetscFree(aa);CHKERRQ(ierr); 420 421 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 422 if (file) { 423 fprintf(file,"-matload_block_size %d\n",a->bs); 424 } 425 PetscFunctionReturn(0); 426 } 427 428 #undef __FUNCT__ 429 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 430 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 431 { 432 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 433 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 434 PetscViewerFormat format; 435 436 PetscFunctionBegin; 437 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 439 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 442 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 443 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNCT__ 502 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 PetscViewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = PETSC_DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = PETSC_DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = PETSC_DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNCT__ 567 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 569 { 570 int ierr; 571 PetscReal xl,yl,xr,yr,w,h; 572 PetscDraw draw; 573 PetscTruth isnull; 574 575 PetscFunctionBegin; 576 577 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 578 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 579 580 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 581 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 582 xr += w; yr += h; xl = -w; yl = -h; 583 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 584 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 585 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 586 PetscFunctionReturn(0); 587 } 588 589 #undef __FUNCT__ 590 #define __FUNCT__ "MatView_SeqBAIJ" 591 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 592 { 593 int ierr; 594 PetscTruth issocket,isascii,isbinary,isdraw; 595 596 PetscFunctionBegin; 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 601 if (issocket) { 602 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 603 } else if (isascii){ 604 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 605 } else if (isbinary) { 606 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 607 } else if (isdraw) { 608 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 609 } else { 610 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 611 } 612 PetscFunctionReturn(0); 613 } 614 615 616 #undef __FUNCT__ 617 #define __FUNCT__ "MatGetValues_SeqBAIJ" 618 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 619 { 620 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 621 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 622 int *ai = a->i,*ailen = a->ilen; 623 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 624 MatScalar *ap,*aa = a->a,zero = 0.0; 625 626 PetscFunctionBegin; 627 for (k=0; k<m; k++) { /* loop over rows */ 628 row = im[k]; brow = row/bs; 629 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 630 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 631 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 632 nrow = ailen[brow]; 633 for (l=0; l<n; l++) { /* loop over columns */ 634 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 635 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 636 col = in[l] ; 637 bcol = col/bs; 638 cidx = col%bs; 639 ridx = row%bs; 640 high = nrow; 641 low = 0; /* assume unsorted */ 642 while (high-low > 5) { 643 t = (low+high)/2; 644 if (rp[t] > bcol) high = t; 645 else low = t; 646 } 647 for (i=low; i<high; i++) { 648 if (rp[i] > bcol) break; 649 if (rp[i] == bcol) { 650 *v++ = ap[bs2*i+bs*cidx+ridx]; 651 goto finished; 652 } 653 } 654 *v++ = zero; 655 finished:; 656 } 657 } 658 PetscFunctionReturn(0); 659 } 660 661 #if defined(PETSC_USE_MAT_SINGLE) 662 #undef __FUNCT__ 663 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 664 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 665 { 666 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 667 int ierr,i,N = m*n*b->bs2; 668 MatScalar *vsingle; 669 670 PetscFunctionBegin; 671 if (N > b->setvalueslen) { 672 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 673 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 674 b->setvalueslen = N; 675 } 676 vsingle = b->setvaluescopy; 677 for (i=0; i<N; i++) { 678 vsingle[i] = v[i]; 679 } 680 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 681 PetscFunctionReturn(0); 682 } 683 #endif 684 685 686 #undef __FUNCT__ 687 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 688 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 689 { 690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 691 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 692 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 693 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 694 PetscTruth roworiented=a->roworiented; 695 MatScalar *value = v,*ap,*aa = a->a,*bap; 696 697 PetscFunctionBegin; 698 if (roworiented) { 699 stepval = (n-1)*bs; 700 } else { 701 stepval = (m-1)*bs; 702 } 703 for (k=0; k<m; k++) { /* loop over added rows */ 704 row = im[k]; 705 if (row < 0) continue; 706 #if defined(PETSC_USE_BOPT_g) 707 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 708 #endif 709 rp = aj + ai[row]; 710 ap = aa + bs2*ai[row]; 711 rmax = imax[row]; 712 nrow = ailen[row]; 713 low = 0; 714 for (l=0; l<n; l++) { /* loop over added columns */ 715 if (in[l] < 0) continue; 716 #if defined(PETSC_USE_BOPT_g) 717 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 718 #endif 719 col = in[l]; 720 if (roworiented) { 721 value = v + k*(stepval+bs)*bs + l*bs; 722 } else { 723 value = v + l*(stepval+bs)*bs + k*bs; 724 } 725 if (!sorted) low = 0; high = nrow; 726 while (high-low > 7) { 727 t = (low+high)/2; 728 if (rp[t] > col) high = t; 729 else low = t; 730 } 731 for (i=low; i<high; i++) { 732 if (rp[i] > col) break; 733 if (rp[i] == col) { 734 bap = ap + bs2*i; 735 if (roworiented) { 736 if (is == ADD_VALUES) { 737 for (ii=0; ii<bs; ii++,value+=stepval) { 738 for (jj=ii; jj<bs2; jj+=bs) { 739 bap[jj] += *value++; 740 } 741 } 742 } else { 743 for (ii=0; ii<bs; ii++,value+=stepval) { 744 for (jj=ii; jj<bs2; jj+=bs) { 745 bap[jj] = *value++; 746 } 747 } 748 } 749 } else { 750 if (is == ADD_VALUES) { 751 for (ii=0; ii<bs; ii++,value+=stepval) { 752 for (jj=0; jj<bs; jj++) { 753 *bap++ += *value++; 754 } 755 } 756 } else { 757 for (ii=0; ii<bs; ii++,value+=stepval) { 758 for (jj=0; jj<bs; jj++) { 759 *bap++ = *value++; 760 } 761 } 762 } 763 } 764 goto noinsert2; 765 } 766 } 767 if (nonew == 1) goto noinsert2; 768 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 769 if (nrow >= rmax) { 770 /* there is no extra room in row, therefore enlarge */ 771 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 772 MatScalar *new_a; 773 774 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 775 776 /* malloc new storage space */ 777 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 778 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 779 new_j = (int*)(new_a + bs2*new_nz); 780 new_i = new_j + new_nz; 781 782 /* copy over old data into new slots */ 783 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 784 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 785 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 786 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 787 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 790 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 791 /* free up old matrix storage */ 792 ierr = PetscFree(a->a);CHKERRQ(ierr); 793 if (!a->singlemalloc) { 794 ierr = PetscFree(a->i);CHKERRQ(ierr); 795 ierr = PetscFree(a->j);CHKERRQ(ierr); 796 } 797 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 798 a->singlemalloc = PETSC_TRUE; 799 800 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 801 rmax = imax[row] = imax[row] + CHUNKSIZE; 802 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 803 a->maxnz += bs2*CHUNKSIZE; 804 a->reallocs++; 805 a->nz++; 806 } 807 N = nrow++ - 1; 808 /* shift up all the later entries in this row */ 809 for (ii=N; ii>=i; ii--) { 810 rp[ii+1] = rp[ii]; 811 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 812 } 813 if (N >= i) { 814 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 815 } 816 rp[i] = col; 817 bap = ap + bs2*i; 818 if (roworiented) { 819 for (ii=0; ii<bs; ii++,value+=stepval) { 820 for (jj=ii; jj<bs2; jj+=bs) { 821 bap[jj] = *value++; 822 } 823 } 824 } else { 825 for (ii=0; ii<bs; ii++,value+=stepval) { 826 for (jj=0; jj<bs; jj++) { 827 *bap++ = *value++; 828 } 829 } 830 } 831 noinsert2:; 832 low = i; 833 } 834 ailen[row] = nrow; 835 } 836 PetscFunctionReturn(0); 837 } 838 839 840 #undef __FUNCT__ 841 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 842 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 845 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 846 int m = A->m,*ip,N,*ailen = a->ilen; 847 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 848 MatScalar *aa = a->a,*ap; 849 850 PetscFunctionBegin; 851 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 852 853 if (m) rmax = ailen[0]; 854 for (i=1; i<mbs; i++) { 855 /* move each row back by the amount of empty slots (fshift) before it*/ 856 fshift += imax[i-1] - ailen[i-1]; 857 rmax = PetscMax(rmax,ailen[i]); 858 if (fshift) { 859 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 860 N = ailen[i]; 861 for (j=0; j<N; j++) { 862 ip[j-fshift] = ip[j]; 863 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 864 } 865 } 866 ai[i] = ai[i-1] + ailen[i-1]; 867 } 868 if (mbs) { 869 fshift += imax[mbs-1] - ailen[mbs-1]; 870 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 871 } 872 /* reset ilen and imax for each row */ 873 for (i=0; i<mbs; i++) { 874 ailen[i] = imax[i] = ai[i+1] - ai[i]; 875 } 876 a->nz = ai[mbs]; 877 878 /* diagonals may have moved, so kill the diagonal pointers */ 879 if (fshift && a->diag) { 880 ierr = PetscFree(a->diag);CHKERRQ(ierr); 881 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 882 a->diag = 0; 883 } 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 885 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 887 a->reallocs); 888 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 889 a->reallocs = 0; 890 A->info.nz_unneeded = (PetscReal)fshift*bs2; 891 892 PetscFunctionReturn(0); 893 } 894 895 896 897 /* 898 This function returns an array of flags which indicate the locations of contiguous 899 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 900 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 901 Assume: sizes should be long enough to hold all the values. 902 */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 905 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 906 { 907 int i,j,k,row; 908 PetscTruth flg; 909 910 PetscFunctionBegin; 911 for (i=0,j=0; i<n; j++) { 912 row = idx[i]; 913 if (row%bs!=0) { /* Not the begining of a block */ 914 sizes[j] = 1; 915 i++; 916 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 917 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 918 i++; 919 } else { /* Begining of the block, so check if the complete block exists */ 920 flg = PETSC_TRUE; 921 for (k=1; k<bs; k++) { 922 if (row+k != idx[i+k]) { /* break in the block */ 923 flg = PETSC_FALSE; 924 break; 925 } 926 } 927 if (flg == PETSC_TRUE) { /* No break in the bs */ 928 sizes[j] = bs; 929 i+= bs; 930 } else { 931 sizes[j] = 1; 932 i++; 933 } 934 } 935 } 936 *bs_max = j; 937 PetscFunctionReturn(0); 938 } 939 940 #undef __FUNCT__ 941 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 942 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 943 { 944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 945 int ierr,i,j,k,count,is_n,*is_idx,*rows; 946 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 947 Scalar zero = 0.0; 948 MatScalar *aa; 949 950 PetscFunctionBegin; 951 /* Make a copy of the IS and sort it */ 952 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 953 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 954 955 /* allocate memory for rows,sizes */ 956 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 957 sizes = rows + is_n; 958 959 /* copy IS values to rows, and sort them */ 960 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 961 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 962 if (baij->keepzeroedrows) { 963 for (i=0; i<is_n; i++) { sizes[i] = 1; } 964 bs_max = is_n; 965 } else { 966 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 967 } 968 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 969 970 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 971 row = rows[j]; 972 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 973 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 974 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 975 if (sizes[i] == bs && !baij->keepzeroedrows) { 976 if (diag) { 977 if (baij->ilen[row/bs] > 0) { 978 baij->ilen[row/bs] = 1; 979 baij->j[baij->i[row/bs]] = row/bs; 980 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 981 } 982 /* Now insert all the diagonal values for this bs */ 983 for (k=0; k<bs; k++) { 984 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 985 } 986 } else { /* (!diag) */ 987 baij->ilen[row/bs] = 0; 988 } /* end (!diag) */ 989 } else { /* (sizes[i] != bs) */ 990 #if defined (PETSC_USE_DEBUG) 991 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 992 #endif 993 for (k=0; k<count; k++) { 994 aa[0] = zero; 995 aa += bs; 996 } 997 if (diag) { 998 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 999 } 1000 } 1001 } 1002 1003 ierr = PetscFree(rows);CHKERRQ(ierr); 1004 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1010 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1011 { 1012 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1013 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1014 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1015 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1016 int ridx,cidx,bs2=a->bs2,ierr; 1017 PetscTruth roworiented=a->roworiented; 1018 MatScalar *ap,value,*aa=a->a,*bap; 1019 1020 PetscFunctionBegin; 1021 for (k=0; k<m; k++) { /* loop over added rows */ 1022 row = im[k]; brow = row/bs; 1023 if (row < 0) continue; 1024 #if defined(PETSC_USE_BOPT_g) 1025 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1026 #endif 1027 rp = aj + ai[brow]; 1028 ap = aa + bs2*ai[brow]; 1029 rmax = imax[brow]; 1030 nrow = ailen[brow]; 1031 low = 0; 1032 for (l=0; l<n; l++) { /* loop over added columns */ 1033 if (in[l] < 0) continue; 1034 #if defined(PETSC_USE_BOPT_g) 1035 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1036 #endif 1037 col = in[l]; bcol = col/bs; 1038 ridx = row % bs; cidx = col % bs; 1039 if (roworiented) { 1040 value = v[l + k*n]; 1041 } else { 1042 value = v[k + l*m]; 1043 } 1044 if (!sorted) low = 0; high = nrow; 1045 while (high-low > 7) { 1046 t = (low+high)/2; 1047 if (rp[t] > bcol) high = t; 1048 else low = t; 1049 } 1050 for (i=low; i<high; i++) { 1051 if (rp[i] > bcol) break; 1052 if (rp[i] == bcol) { 1053 bap = ap + bs2*i + bs*cidx + ridx; 1054 if (is == ADD_VALUES) *bap += value; 1055 else *bap = value; 1056 goto noinsert1; 1057 } 1058 } 1059 if (nonew == 1) goto noinsert1; 1060 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1061 if (nrow >= rmax) { 1062 /* there is no extra room in row, therefore enlarge */ 1063 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1064 MatScalar *new_a; 1065 1066 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1067 1068 /* Malloc new storage space */ 1069 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1070 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1071 new_j = (int*)(new_a + bs2*new_nz); 1072 new_i = new_j + new_nz; 1073 1074 /* copy over old data into new slots */ 1075 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1076 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1077 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1078 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1079 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1081 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1082 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1083 /* free up old matrix storage */ 1084 ierr = PetscFree(a->a);CHKERRQ(ierr); 1085 if (!a->singlemalloc) { 1086 ierr = PetscFree(a->i);CHKERRQ(ierr); 1087 ierr = PetscFree(a->j);CHKERRQ(ierr); 1088 } 1089 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1090 a->singlemalloc = PETSC_TRUE; 1091 1092 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1093 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1094 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1095 a->maxnz += bs2*CHUNKSIZE; 1096 a->reallocs++; 1097 a->nz++; 1098 } 1099 N = nrow++ - 1; 1100 /* shift up all the later entries in this row */ 1101 for (ii=N; ii>=i; ii--) { 1102 rp[ii+1] = rp[ii]; 1103 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 if (N>=i) { 1106 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1107 } 1108 rp[i] = bcol; 1109 ap[bs2*i + bs*cidx + ridx] = value; 1110 noinsert1:; 1111 low = i; 1112 } 1113 ailen[brow] = nrow; 1114 } 1115 PetscFunctionReturn(0); 1116 } 1117 1118 EXTERN int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,MatLUInfo*,Mat*); 1119 EXTERN int MatLUFactor_SeqBAIJ(Mat,IS,IS,MatLUInfo*); 1120 EXTERN int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1121 EXTERN int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1122 EXTERN int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1123 EXTERN int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1124 EXTERN int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1125 EXTERN int MatScale_SeqBAIJ(Scalar*,Mat); 1126 EXTERN int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1127 EXTERN int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1128 EXTERN int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1129 EXTERN int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1130 EXTERN int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1131 EXTERN int MatZeroEntries_SeqBAIJ(Mat); 1132 1133 EXTERN int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1134 EXTERN int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1135 EXTERN int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1136 EXTERN int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1137 EXTERN int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1138 EXTERN int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1139 EXTERN int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1140 EXTERN int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1141 EXTERN int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1142 EXTERN int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1143 EXTERN int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1144 EXTERN int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1145 EXTERN int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1146 EXTERN int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1147 EXTERN int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1148 1149 EXTERN int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1150 EXTERN int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1151 EXTERN int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1152 EXTERN int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1153 EXTERN int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1154 EXTERN int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1155 EXTERN int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1156 1157 EXTERN int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1158 EXTERN int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1159 EXTERN int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1160 EXTERN int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1161 EXTERN int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1162 EXTERN int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1163 EXTERN int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1164 EXTERN int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1165 1166 EXTERN int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1167 EXTERN int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1168 EXTERN int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1169 EXTERN int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1170 EXTERN int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1171 EXTERN int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1172 EXTERN int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1173 EXTERN int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1174 1175 #undef __FUNCT__ 1176 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1177 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1178 { 1179 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1180 Mat outA; 1181 int ierr; 1182 PetscTruth row_identity,col_identity; 1183 1184 PetscFunctionBegin; 1185 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1186 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1187 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1188 if (!row_identity || !col_identity) { 1189 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1190 } 1191 1192 outA = inA; 1193 inA->factor = FACTOR_LU; 1194 1195 if (!a->diag) { 1196 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1197 } 1198 /* 1199 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1200 for ILU(0) factorization with natural ordering 1201 */ 1202 switch (a->bs) { 1203 case 1: 1204 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1205 inA->ops->solve = MatSolve_SeqBAIJ_1_NaturalOrdering; 1206 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1_NaturalOrdering; 1207 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=1\n"); 1208 case 2: 1209 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1210 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1211 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1212 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1213 break; 1214 case 3: 1215 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1216 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1217 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1218 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1219 break; 1220 case 4: 1221 #if defined(PETSC_HAVE_SSE) 1222 { 1223 PetscTruth sse_enabled,single_prec,flg; 1224 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 1225 if (sse_enabled) { 1226 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 1227 ierr = PetscOptionsGetLogical(PETSC_NULL,"-matsolve_single_precision",&single_prec,&flg);CHKERRQ(ierr); 1228 if (flg) { 1229 if (single_prec) { 1230 a->singleprecisionsolves = PETSC_TRUE; 1231 } else { 1232 a->singleprecisionsolves = PETSC_FALSE; 1233 } 1234 } 1235 if (a->singleprecisionsolves) { 1236 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion; 1237 } else { 1238 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1239 } 1240 } else { 1241 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1242 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1243 } 1244 } 1245 #else 1246 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1247 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1248 #endif 1249 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1250 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1251 break; 1252 case 5: 1253 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1254 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1255 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1256 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1257 break; 1258 case 6: 1259 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1260 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1261 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1262 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1263 break; 1264 case 7: 1265 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1266 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1267 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1268 PetscLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1269 break; 1270 default: 1271 a->row = row; 1272 a->col = col; 1273 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1274 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1275 1276 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1277 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1278 PetscLogObjectParent(inA,a->icol); 1279 1280 if (!a->solve_work) { 1281 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1282 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1283 } 1284 } 1285 1286 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1287 1288 PetscFunctionReturn(0); 1289 } 1290 #undef __FUNCT__ 1291 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1292 int MatPrintHelp_SeqBAIJ(Mat A) 1293 { 1294 static PetscTruth called = PETSC_FALSE; 1295 MPI_Comm comm = A->comm; 1296 int ierr; 1297 1298 PetscFunctionBegin; 1299 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1300 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1301 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 EXTERN_C_BEGIN 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1308 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1309 { 1310 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1311 int i,nz,n; 1312 1313 PetscFunctionBegin; 1314 nz = baij->maxnz; 1315 n = mat->n; 1316 for (i=0; i<nz; i++) { 1317 baij->j[i] = indices[i]; 1318 } 1319 baij->nz = nz; 1320 for (i=0; i<n; i++) { 1321 baij->ilen[i] = baij->imax[i]; 1322 } 1323 1324 PetscFunctionReturn(0); 1325 } 1326 EXTERN_C_END 1327 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1330 /*@ 1331 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1332 in the matrix. 1333 1334 Input Parameters: 1335 + mat - the SeqBAIJ matrix 1336 - indices - the column indices 1337 1338 Level: advanced 1339 1340 Notes: 1341 This can be called if you have precomputed the nonzero structure of the 1342 matrix and want to provide it to the matrix object to improve the performance 1343 of the MatSetValues() operation. 1344 1345 You MUST have set the correct numbers of nonzeros per row in the call to 1346 MatCreateSeqBAIJ(). 1347 1348 MUST be called before any calls to MatSetValues(); 1349 1350 @*/ 1351 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1352 { 1353 int ierr,(*f)(Mat,int *); 1354 1355 PetscFunctionBegin; 1356 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1357 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1358 if (f) { 1359 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1360 } else { 1361 SETERRQ(1,"Wrong type of matrix to set column indices"); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1368 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1369 { 1370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1371 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1372 PetscReal atmp; 1373 Scalar *x,zero = 0.0; 1374 MatScalar *aa; 1375 int ncols,brow,krow,kcol; 1376 1377 PetscFunctionBegin; 1378 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1379 bs = a->bs; 1380 aa = a->a; 1381 ai = a->i; 1382 aj = a->j; 1383 mbs = a->mbs; 1384 1385 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1386 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1387 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1388 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1389 for (i=0; i<mbs; i++) { 1390 ncols = ai[1] - ai[0]; ai++; 1391 brow = bs*i; 1392 for (j=0; j<ncols; j++){ 1393 /* bcol = bs*(*aj); */ 1394 for (kcol=0; kcol<bs; kcol++){ 1395 for (krow=0; krow<bs; krow++){ 1396 atmp = PetscAbsScalar(*aa); aa++; 1397 row = brow + krow; /* row index */ 1398 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1399 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1400 } 1401 } 1402 aj++; 1403 } 1404 } 1405 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1411 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1412 { 1413 int ierr; 1414 1415 PetscFunctionBegin; 1416 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1422 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1423 { 1424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1425 PetscFunctionBegin; 1426 *array = a->a; 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1432 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1433 { 1434 PetscFunctionBegin; 1435 PetscFunctionReturn(0); 1436 } 1437 1438 /* -------------------------------------------------------------------*/ 1439 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1440 MatGetRow_SeqBAIJ, 1441 MatRestoreRow_SeqBAIJ, 1442 MatMult_SeqBAIJ_N, 1443 MatMultAdd_SeqBAIJ_N, 1444 MatMultTranspose_SeqBAIJ, 1445 MatMultTransposeAdd_SeqBAIJ, 1446 MatSolve_SeqBAIJ_N, 1447 0, 1448 0, 1449 0, 1450 MatLUFactor_SeqBAIJ, 1451 0, 1452 0, 1453 MatTranspose_SeqBAIJ, 1454 MatGetInfo_SeqBAIJ, 1455 MatEqual_SeqBAIJ, 1456 MatGetDiagonal_SeqBAIJ, 1457 MatDiagonalScale_SeqBAIJ, 1458 MatNorm_SeqBAIJ, 1459 0, 1460 MatAssemblyEnd_SeqBAIJ, 1461 0, 1462 MatSetOption_SeqBAIJ, 1463 MatZeroEntries_SeqBAIJ, 1464 MatZeroRows_SeqBAIJ, 1465 MatLUFactorSymbolic_SeqBAIJ, 1466 MatLUFactorNumeric_SeqBAIJ_N, 1467 0, 1468 0, 1469 MatSetUpPreallocation_SeqBAIJ, 1470 0, 1471 MatGetOwnershipRange_SeqBAIJ, 1472 MatILUFactorSymbolic_SeqBAIJ, 1473 0, 1474 MatGetArray_SeqBAIJ, 1475 MatRestoreArray_SeqBAIJ, 1476 MatDuplicate_SeqBAIJ, 1477 0, 1478 0, 1479 MatILUFactor_SeqBAIJ, 1480 0, 1481 0, 1482 MatGetSubMatrices_SeqBAIJ, 1483 MatIncreaseOverlap_SeqBAIJ, 1484 MatGetValues_SeqBAIJ, 1485 0, 1486 MatPrintHelp_SeqBAIJ, 1487 MatScale_SeqBAIJ, 1488 0, 1489 0, 1490 0, 1491 MatGetBlockSize_SeqBAIJ, 1492 MatGetRowIJ_SeqBAIJ, 1493 MatRestoreRowIJ_SeqBAIJ, 1494 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 0, 1500 MatSetValuesBlocked_SeqBAIJ, 1501 MatGetSubMatrix_SeqBAIJ, 1502 MatDestroy_SeqBAIJ, 1503 MatView_SeqBAIJ, 1504 MatGetMaps_Petsc, 1505 0, 1506 0, 1507 0, 1508 0, 1509 0, 1510 0, 1511 MatGetRowMax_SeqBAIJ, 1512 MatConvert_Basic}; 1513 1514 EXTERN_C_BEGIN 1515 #undef __FUNCT__ 1516 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1517 int MatStoreValues_SeqBAIJ(Mat mat) 1518 { 1519 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1520 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1521 int ierr; 1522 1523 PetscFunctionBegin; 1524 if (aij->nonew != 1) { 1525 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1526 } 1527 1528 /* allocate space for values if not already there */ 1529 if (!aij->saved_values) { 1530 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1531 } 1532 1533 /* copy values over */ 1534 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 EXTERN_C_END 1538 1539 EXTERN_C_BEGIN 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1542 int MatRetrieveValues_SeqBAIJ(Mat mat) 1543 { 1544 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1545 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1546 1547 PetscFunctionBegin; 1548 if (aij->nonew != 1) { 1549 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1550 } 1551 if (!aij->saved_values) { 1552 SETERRQ(1,"Must call MatStoreValues(A);first"); 1553 } 1554 1555 /* copy values over */ 1556 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 EXTERN_C_END 1560 1561 EXTERN_C_BEGIN 1562 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1563 EXTERN_C_END 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatCreate_SeqBAIJ" 1568 int MatCreate_SeqBAIJ(Mat B) 1569 { 1570 int ierr,size; 1571 Mat_SeqBAIJ *b; 1572 1573 PetscFunctionBegin; 1574 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1575 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1576 1577 B->m = B->M = PetscMax(B->m,B->M); 1578 B->n = B->N = PetscMax(B->n,B->N); 1579 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1580 B->data = (void*)b; 1581 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1582 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1583 B->factor = 0; 1584 B->lupivotthreshold = 1.0; 1585 B->mapping = 0; 1586 b->row = 0; 1587 b->col = 0; 1588 b->icol = 0; 1589 b->reallocs = 0; 1590 b->saved_values = 0; 1591 #if defined(PETSC_USE_MAT_SINGLE) 1592 b->setvalueslen = 0; 1593 b->setvaluescopy = PETSC_NULL; 1594 #endif 1595 1596 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1597 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1598 1599 b->sorted = PETSC_FALSE; 1600 b->roworiented = PETSC_TRUE; 1601 b->nonew = 0; 1602 b->diag = 0; 1603 b->solve_work = 0; 1604 b->mult_work = 0; 1605 b->spptr = 0; 1606 B->info.nz_unneeded = (PetscReal)b->maxnz; 1607 b->keepzeroedrows = PETSC_FALSE; 1608 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1610 "MatStoreValues_SeqBAIJ", 1611 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1613 "MatRetrieveValues_SeqBAIJ", 1614 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1616 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1617 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1619 "MatConvert_SeqBAIJ_SeqAIJ", 1620 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1621 PetscFunctionReturn(0); 1622 } 1623 EXTERN_C_END 1624 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1627 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1628 { 1629 Mat C; 1630 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1631 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1632 1633 PetscFunctionBegin; 1634 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1635 1636 *B = 0; 1637 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1638 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1639 c = (Mat_SeqBAIJ*)C->data; 1640 1641 c->bs = a->bs; 1642 c->bs2 = a->bs2; 1643 c->mbs = a->mbs; 1644 c->nbs = a->nbs; 1645 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1646 1647 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1648 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1649 for (i=0; i<mbs; i++) { 1650 c->imax[i] = a->imax[i]; 1651 c->ilen[i] = a->ilen[i]; 1652 } 1653 1654 /* allocate the matrix space */ 1655 c->singlemalloc = PETSC_TRUE; 1656 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1657 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1658 c->j = (int*)(c->a + nz*bs2); 1659 c->i = c->j + nz; 1660 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1661 if (mbs > 0) { 1662 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1663 if (cpvalues == MAT_COPY_VALUES) { 1664 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1665 } else { 1666 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1667 } 1668 } 1669 1670 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1671 c->sorted = a->sorted; 1672 c->roworiented = a->roworiented; 1673 c->nonew = a->nonew; 1674 1675 if (a->diag) { 1676 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1677 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1678 for (i=0; i<mbs; i++) { 1679 c->diag[i] = a->diag[i]; 1680 } 1681 } else c->diag = 0; 1682 c->nz = a->nz; 1683 c->maxnz = a->maxnz; 1684 c->solve_work = 0; 1685 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1686 c->mult_work = 0; 1687 C->preallocated = PETSC_TRUE; 1688 C->assembled = PETSC_TRUE; 1689 *B = C; 1690 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 EXTERN_C_BEGIN 1695 #undef __FUNCT__ 1696 #define __FUNCT__ "MatLoad_SeqBAIJ" 1697 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1698 { 1699 Mat_SeqBAIJ *a; 1700 Mat B; 1701 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1702 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1703 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1704 int *masked,nmask,tmp,bs2,ishift; 1705 Scalar *aa; 1706 MPI_Comm comm = ((PetscObject)viewer)->comm; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1710 bs2 = bs*bs; 1711 1712 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1713 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1714 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1715 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1716 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1717 M = header[1]; N = header[2]; nz = header[3]; 1718 1719 if (header[3] < 0) { 1720 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1721 } 1722 1723 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1724 1725 /* 1726 This code adds extra rows to make sure the number of rows is 1727 divisible by the blocksize 1728 */ 1729 mbs = M/bs; 1730 extra_rows = bs - M + bs*(mbs); 1731 if (extra_rows == bs) extra_rows = 0; 1732 else mbs++; 1733 if (extra_rows) { 1734 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1735 } 1736 1737 /* read in row lengths */ 1738 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1739 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1740 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1741 1742 /* read in column indices */ 1743 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1744 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1745 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1746 1747 /* loop over row lengths determining block row lengths */ 1748 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1749 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1750 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1751 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1752 masked = mask + mbs; 1753 rowcount = 0; nzcount = 0; 1754 for (i=0; i<mbs; i++) { 1755 nmask = 0; 1756 for (j=0; j<bs; j++) { 1757 kmax = rowlengths[rowcount]; 1758 for (k=0; k<kmax; k++) { 1759 tmp = jj[nzcount++]/bs; 1760 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1761 } 1762 rowcount++; 1763 } 1764 browlengths[i] += nmask; 1765 /* zero out the mask elements we set */ 1766 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1767 } 1768 1769 /* create our matrix */ 1770 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1771 B = *A; 1772 a = (Mat_SeqBAIJ*)B->data; 1773 1774 /* set matrix "i" values */ 1775 a->i[0] = 0; 1776 for (i=1; i<= mbs; i++) { 1777 a->i[i] = a->i[i-1] + browlengths[i-1]; 1778 a->ilen[i-1] = browlengths[i-1]; 1779 } 1780 a->nz = 0; 1781 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1782 1783 /* read in nonzero values */ 1784 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1785 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1786 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1787 1788 /* set "a" and "j" values into matrix */ 1789 nzcount = 0; jcount = 0; 1790 for (i=0; i<mbs; i++) { 1791 nzcountb = nzcount; 1792 nmask = 0; 1793 for (j=0; j<bs; j++) { 1794 kmax = rowlengths[i*bs+j]; 1795 for (k=0; k<kmax; k++) { 1796 tmp = jj[nzcount++]/bs; 1797 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1798 } 1799 } 1800 /* sort the masked values */ 1801 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1802 1803 /* set "j" values into matrix */ 1804 maskcount = 1; 1805 for (j=0; j<nmask; j++) { 1806 a->j[jcount++] = masked[j]; 1807 mask[masked[j]] = maskcount++; 1808 } 1809 /* set "a" values into matrix */ 1810 ishift = bs2*a->i[i]; 1811 for (j=0; j<bs; j++) { 1812 kmax = rowlengths[i*bs+j]; 1813 for (k=0; k<kmax; k++) { 1814 tmp = jj[nzcountb]/bs ; 1815 block = mask[tmp] - 1; 1816 point = jj[nzcountb] - bs*tmp; 1817 idx = ishift + bs2*block + j + bs*point; 1818 a->a[idx] = (MatScalar)aa[nzcountb++]; 1819 } 1820 } 1821 /* zero out the mask elements we set */ 1822 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1823 } 1824 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1825 1826 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1827 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1828 ierr = PetscFree(aa);CHKERRQ(ierr); 1829 ierr = PetscFree(jj);CHKERRQ(ierr); 1830 ierr = PetscFree(mask);CHKERRQ(ierr); 1831 1832 B->assembled = PETSC_TRUE; 1833 1834 ierr = MatView_Private(B);CHKERRQ(ierr); 1835 PetscFunctionReturn(0); 1836 } 1837 EXTERN_C_END 1838 1839 #undef __FUNCT__ 1840 #define __FUNCT__ "MatCreateSeqBAIJ" 1841 /*@C 1842 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1843 compressed row) format. For good matrix assembly performance the 1844 user should preallocate the matrix storage by setting the parameter nz 1845 (or the array nnz). By setting these parameters accurately, performance 1846 during matrix assembly can be increased by more than a factor of 50. 1847 1848 Collective on MPI_Comm 1849 1850 Input Parameters: 1851 + comm - MPI communicator, set to PETSC_COMM_SELF 1852 . bs - size of block 1853 . m - number of rows 1854 . n - number of columns 1855 . nz - number of nonzero blocks per block row (same for all rows) 1856 - nnz - array containing the number of nonzero blocks in the various block rows 1857 (possibly different for each block row) or PETSC_NULL 1858 1859 Output Parameter: 1860 . A - the matrix 1861 1862 Options Database Keys: 1863 . -mat_no_unroll - uses code that does not unroll the loops in the 1864 block calculations (much slower) 1865 . -mat_block_size - size of the blocks to use 1866 1867 Level: intermediate 1868 1869 Notes: 1870 A nonzero block is any block that as 1 or more nonzeros in it 1871 1872 The block AIJ format is fully compatible with standard Fortran 77 1873 storage. That is, the stored row and column indices can begin at 1874 either one (as in Fortran) or zero. See the users' manual for details. 1875 1876 Specify the preallocated storage with either nz or nnz (not both). 1877 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1878 allocation. For additional details, see the users manual chapter on 1879 matrices. 1880 1881 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1882 @*/ 1883 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1884 { 1885 int ierr; 1886 1887 PetscFunctionBegin; 1888 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1889 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1890 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1891 PetscFunctionReturn(0); 1892 } 1893 1894 #undef __FUNCT__ 1895 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1896 /*@C 1897 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1898 per row in the matrix. For good matrix assembly performance the 1899 user should preallocate the matrix storage by setting the parameter nz 1900 (or the array nnz). By setting these parameters accurately, performance 1901 during matrix assembly can be increased by more than a factor of 50. 1902 1903 Collective on MPI_Comm 1904 1905 Input Parameters: 1906 + A - the matrix 1907 . bs - size of block 1908 . nz - number of block nonzeros per block row (same for all rows) 1909 - nnz - array containing the number of block nonzeros in the various block rows 1910 (possibly different for each block row) or PETSC_NULL 1911 1912 Options Database Keys: 1913 . -mat_no_unroll - uses code that does not unroll the loops in the 1914 block calculations (much slower) 1915 . -mat_block_size - size of the blocks to use 1916 1917 Level: intermediate 1918 1919 Notes: 1920 The block AIJ format is fully compatible with standard Fortran 77 1921 storage. That is, the stored row and column indices can begin at 1922 either one (as in Fortran) or zero. See the users' manual for details. 1923 1924 Specify the preallocated storage with either nz or nnz (not both). 1925 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1926 allocation. For additional details, see the users manual chapter on 1927 matrices. 1928 1929 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1930 @*/ 1931 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1932 { 1933 Mat_SeqBAIJ *b; 1934 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1935 PetscTruth flg; 1936 1937 PetscFunctionBegin; 1938 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1939 if (!flg) PetscFunctionReturn(0); 1940 1941 B->preallocated = PETSC_TRUE; 1942 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1943 if (nnz && newbs != bs) { 1944 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1945 } 1946 bs = newbs; 1947 1948 mbs = B->m/bs; 1949 nbs = B->n/bs; 1950 bs2 = bs*bs; 1951 1952 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1953 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1954 } 1955 1956 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1957 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1958 if (nnz) { 1959 for (i=0; i<mbs; i++) { 1960 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1961 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1962 } 1963 } 1964 1965 b = (Mat_SeqBAIJ*)B->data; 1966 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1967 if (!flg) { 1968 switch (bs) { 1969 case 1: 1970 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1971 B->ops->solve = MatSolve_SeqBAIJ_1; 1972 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1973 B->ops->mult = MatMult_SeqBAIJ_1; 1974 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1975 break; 1976 case 2: 1977 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1978 B->ops->solve = MatSolve_SeqBAIJ_2; 1979 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1980 B->ops->mult = MatMult_SeqBAIJ_2; 1981 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1982 break; 1983 case 3: 1984 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1985 B->ops->solve = MatSolve_SeqBAIJ_3; 1986 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1987 B->ops->mult = MatMult_SeqBAIJ_3; 1988 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1989 break; 1990 case 4: 1991 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1992 B->ops->solve = MatSolve_SeqBAIJ_4; 1993 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1994 B->ops->mult = MatMult_SeqBAIJ_4; 1995 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1996 break; 1997 case 5: 1998 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1999 B->ops->solve = MatSolve_SeqBAIJ_5; 2000 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 2001 B->ops->mult = MatMult_SeqBAIJ_5; 2002 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2003 break; 2004 case 6: 2005 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2006 B->ops->solve = MatSolve_SeqBAIJ_6; 2007 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 2008 B->ops->mult = MatMult_SeqBAIJ_6; 2009 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2010 break; 2011 case 7: 2012 B->ops->mult = MatMult_SeqBAIJ_7; 2013 B->ops->solve = MatSolve_SeqBAIJ_7; 2014 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 2015 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2016 break; 2017 default: 2018 B->ops->mult = MatMult_SeqBAIJ_N; 2019 B->ops->solve = MatSolve_SeqBAIJ_N; 2020 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2021 break; 2022 } 2023 } 2024 b->bs = bs; 2025 b->mbs = mbs; 2026 b->nbs = nbs; 2027 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2028 if (!nnz) { 2029 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2030 else if (nz <= 0) nz = 1; 2031 for (i=0; i<mbs; i++) b->imax[i] = nz; 2032 nz = nz*mbs; 2033 } else { 2034 nz = 0; 2035 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2036 } 2037 2038 /* allocate the matrix space */ 2039 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2040 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2041 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2042 b->j = (int*)(b->a + nz*bs2); 2043 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2044 b->i = b->j + nz; 2045 b->singlemalloc = PETSC_TRUE; 2046 2047 b->i[0] = 0; 2048 for (i=1; i<mbs+1; i++) { 2049 b->i[i] = b->i[i-1] + b->imax[i-1]; 2050 } 2051 2052 /* b->ilen will count nonzeros in each block row so far. */ 2053 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2054 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2055 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2056 2057 b->bs = bs; 2058 b->bs2 = bs2; 2059 b->mbs = mbs; 2060 b->nz = 0; 2061 b->maxnz = nz*bs2; 2062 B->info.nz_unneeded = (PetscReal)b->maxnz; 2063 PetscFunctionReturn(0); 2064 } 2065 2066