1 /*$Id: baij.c,v 1.199 2000/02/02 20:09:09 bsmith Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 37 int MatMarkDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 40 int i,j,*diag,m = a->mbs; 41 42 PetscFunctionBegin; 43 if (a->diag) PetscFunctionReturn(0); 44 45 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 46 PLogObjectMemory(A,(m+1)*sizeof(int)); 47 for (i=0; i<m; i++) { 48 diag[i] = a->i[i+1]; 49 for (j=a->i[i]; j<a->i[i+1]; j++) { 50 if (a->j[j] == i) { 51 diag[i] = j; 52 break; 53 } 54 } 55 } 56 a->diag = diag; 57 PetscFunctionReturn(0); 58 } 59 60 61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 62 63 #undef __FUNC__ 64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 66 { 67 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 68 int ierr,n = a->mbs,i; 69 70 PetscFunctionBegin; 71 *nn = n; 72 if (!ia) PetscFunctionReturn(0); 73 if (symmetric) { 74 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 75 } else if (oshift == 1) { 76 /* temporarily add 1 to i and j indices */ 77 int nz = a->i[n] + 1; 78 for (i=0; i<nz; i++) a->j[i]++; 79 for (i=0; i<n+1; i++) a->i[i]++; 80 *ia = a->i; *ja = a->j; 81 } else { 82 *ia = a->i; *ja = a->j; 83 } 84 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 91 PetscTruth *done) 92 { 93 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 94 int i,n = a->mbs,ierr; 95 96 PetscFunctionBegin; 97 if (!ia) PetscFunctionReturn(0); 98 if (symmetric) { 99 ierr = PetscFree(*ia);CHKERRQ(ierr); 100 ierr = PetscFree(*ja);CHKERRQ(ierr); 101 } else if (oshift == 1) { 102 int nz = a->i[n]; 103 for (i=0; i<nz; i++) a->j[i]--; 104 for (i=0; i<n+1; i++) a->i[i]--; 105 } 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNC__ 110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 112 { 113 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 114 115 PetscFunctionBegin; 116 *bs = baij->bs; 117 PetscFunctionReturn(0); 118 } 119 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatDestroy_SeqBAIJ" 123 int MatDestroy_SeqBAIJ(Mat A) 124 { 125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(PETSC_USE_LOG) 142 PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 ierr = PetscFree(a);CHKERRQ(ierr); 163 PLogObjectDestroy(A); 164 PetscHeaderDestroy(A); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNC__ 169 #define __FUNC__ "MatSetOption_SeqBAIJ" 170 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 174 PetscFunctionBegin; 175 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 176 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 177 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 178 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 179 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 180 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 181 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 182 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 183 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 184 else if (op == MAT_ROWS_SORTED || 185 op == MAT_ROWS_UNSORTED || 186 op == MAT_SYMMETRIC || 187 op == MAT_STRUCTURALLY_SYMMETRIC || 188 op == MAT_YES_NEW_DIAGONALS || 189 op == MAT_IGNORE_OFF_PROC_ENTRIES || 190 op == MAT_USE_HASH_TABLE) { 191 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 192 } else if (op == MAT_NO_NEW_DIAGONALS) { 193 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 194 } else { 195 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatGetSize_SeqBAIJ" 203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 204 { 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 206 207 PetscFunctionBegin; 208 if (m) *m = a->m; 209 if (n) *n = a->n; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 219 PetscFunctionBegin; 220 if (m) *m = 0; 221 if (n) *n = a->m; 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNC__ 226 #define __FUNC__ "MatGetRow_SeqBAIJ" 227 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 228 { 229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 230 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 231 MatScalar *aa,*aa_i; 232 Scalar *v_i; 233 234 PetscFunctionBegin; 235 bs = a->bs; 236 ai = a->i; 237 aj = a->j; 238 aa = a->a; 239 bs2 = a->bs2; 240 241 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 242 243 bn = row/bs; /* Block number */ 244 bp = row % bs; /* Block Position */ 245 M = ai[bn+1] - ai[bn]; 246 *nz = bs*M; 247 248 if (v) { 249 *v = 0; 250 if (*nz) { 251 *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v); 252 for (i=0; i<M; i++) { /* for each block in the block row */ 253 v_i = *v + i*bs; 254 aa_i = aa + bs2*(ai[bn] + i); 255 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 256 } 257 } 258 } 259 260 if (idx) { 261 *idx = 0; 262 if (*nz) { 263 *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx); 264 for (i=0; i<M; i++) { /* for each block in the block row */ 265 idx_i = *idx + i*bs; 266 itmp = bs*aj[ai[bn] + i]; 267 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 268 } 269 } 270 } 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNC__ 275 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 276 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 277 { 278 int ierr; 279 280 PetscFunctionBegin; 281 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 282 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNC__ 287 #define __FUNC__ "MatTranspose_SeqBAIJ" 288 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 289 { 290 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 291 Mat C; 292 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 293 int *rows,*cols,bs2=a->bs2,refct; 294 Scalar *array; 295 296 PetscFunctionBegin; 297 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 298 col = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 299 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 300 301 #if defined(PETSC_USE_MAT_SINGLE) 302 array = (Scalar *)PetscMalloc(a->bs2*a->nz*sizeof(Scalar));CHKPTRQ(array); 303 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 304 #else 305 *array = a->a; 306 #endif 307 308 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 309 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 310 ierr = PetscFree(col);CHKERRQ(ierr); 311 rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 312 cols = rows + bs; 313 for (i=0; i<mbs; i++) { 314 cols[0] = i*bs; 315 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 316 len = ai[i+1] - ai[i]; 317 for (j=0; j<len; j++) { 318 rows[0] = (*aj++)*bs; 319 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 320 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 321 array += bs2; 322 } 323 } 324 ierr = PetscFree(rows);CHKERRQ(ierr); 325 #if defined(PETSC_USE_MAT_SINGLE) 326 ierr = PetscFree(array); 327 #endif 328 329 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 330 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 331 332 if (B) { 333 *B = C; 334 } else { 335 PetscOps *Abops; 336 MatOps Aops; 337 338 /* This isn't really an in-place transpose */ 339 ierr = PetscFree(a->a);CHKERRQ(ierr); 340 if (!a->singlemalloc) { 341 ierr = PetscFree(a->i);CHKERRQ(ierr); 342 ierr = PetscFree(a->j);CHKERRQ(ierr); 343 } 344 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 345 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 346 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 347 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 348 ierr = PetscFree(a);CHKERRQ(ierr); 349 350 351 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 352 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 353 354 /* 355 This is horrible, horrible code. We need to keep the 356 A pointers for the bops and ops but copy everything 357 else from C. 358 */ 359 Abops = A->bops; 360 Aops = A->ops; 361 refct = A->refct; 362 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 363 A->bops = Abops; 364 A->ops = Aops; 365 A->qlist = 0; 366 A->refct = refct; 367 368 PetscHeaderDestroy(C); 369 } 370 PetscFunctionReturn(0); 371 } 372 373 #undef __FUNC__ 374 #define __FUNC__ "MatView_SeqBAIJ_Binary" 375 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 376 { 377 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 378 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 379 Scalar *aa; 380 FILE *file; 381 382 PetscFunctionBegin; 383 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 384 col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 385 col_lens[0] = MAT_COOKIE; 386 387 col_lens[1] = a->m; 388 col_lens[2] = a->n; 389 col_lens[3] = a->nz*bs2; 390 391 /* store lengths of each row and write (including header) to file */ 392 count = 0; 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 396 } 397 } 398 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 399 ierr = PetscFree(col_lens);CHKERRQ(ierr); 400 401 /* store column indices (zero start index) */ 402 jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 403 count = 0; 404 for (i=0; i<a->mbs; i++) { 405 for (j=0; j<bs; j++) { 406 for (k=a->i[i]; k<a->i[i+1]; k++) { 407 for (l=0; l<bs; l++) { 408 jj[count++] = bs*a->j[k] + l; 409 } 410 } 411 } 412 } 413 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 414 ierr = PetscFree(jj);CHKERRQ(ierr); 415 416 /* store nonzero values */ 417 aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 418 count = 0; 419 for (i=0; i<a->mbs; i++) { 420 for (j=0; j<bs; j++) { 421 for (k=a->i[i]; k<a->i[i+1]; k++) { 422 for (l=0; l<bs; l++) { 423 aa[count++] = a->a[bs2*k + l*bs + j]; 424 } 425 } 426 } 427 } 428 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 429 ierr = PetscFree(aa);CHKERRQ(ierr); 430 431 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 432 if (file) { 433 fprintf(file,"-matload_block_size %d\n",a->bs); 434 } 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNC__ 439 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 440 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 441 { 442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 443 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 444 char *outputname; 445 446 PetscFunctionBegin; 447 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 448 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 449 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 450 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 451 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 452 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 453 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 454 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 455 for (i=0; i<a->mbs; i++) { 456 for (j=0; j<bs; j++) { 457 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 458 for (k=a->i[i]; k<a->i[i+1]; k++) { 459 for (l=0; l<bs; l++) { 460 #if defined(PETSC_USE_COMPLEX) 461 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 462 ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 463 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 464 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 465 ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 466 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 467 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 468 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 469 } 470 #else 471 if (a->a[bs2*k + l*bs + j] != 0.0) { 472 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 473 } 474 #endif 475 } 476 } 477 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 478 } 479 } 480 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 481 } else { 482 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 483 for (i=0; i<a->mbs; i++) { 484 for (j=0; j<bs; j++) { 485 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 486 for (k=a->i[i]; k<a->i[i+1]; k++) { 487 for (l=0; l<bs; l++) { 488 #if defined(PETSC_USE_COMPLEX) 489 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 490 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 491 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 492 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 493 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 494 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 495 } else { 496 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 497 } 498 #else 499 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 500 #endif 501 } 502 } 503 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 504 } 505 } 506 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 507 } 508 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 509 PetscFunctionReturn(0); 510 } 511 512 #undef __FUNC__ 513 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 514 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 515 { 516 Mat A = (Mat) Aa; 517 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 518 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 519 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 520 MatScalar *aa; 521 Viewer viewer; 522 523 PetscFunctionBegin; 524 525 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 526 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 527 528 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 529 530 /* loop over matrix elements drawing boxes */ 531 color = DRAW_BLUE; 532 for (i=0,row=0; i<mbs; i++,row+=bs) { 533 for (j=a->i[i]; j<a->i[i+1]; j++) { 534 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 535 x_l = a->j[j]*bs; x_r = x_l + 1.0; 536 aa = a->a + j*bs2; 537 for (k=0; k<bs; k++) { 538 for (l=0; l<bs; l++) { 539 if (PetscRealPart(*aa++) >= 0.) continue; 540 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 541 } 542 } 543 } 544 } 545 color = DRAW_CYAN; 546 for (i=0,row=0; i<mbs; i++,row+=bs) { 547 for (j=a->i[i]; j<a->i[i+1]; j++) { 548 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 549 x_l = a->j[j]*bs; x_r = x_l + 1.0; 550 aa = a->a + j*bs2; 551 for (k=0; k<bs; k++) { 552 for (l=0; l<bs; l++) { 553 if (PetscRealPart(*aa++) != 0.) continue; 554 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 555 } 556 } 557 } 558 } 559 560 color = DRAW_RED; 561 for (i=0,row=0; i<mbs; i++,row+=bs) { 562 for (j=a->i[i]; j<a->i[i+1]; j++) { 563 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 564 x_l = a->j[j]*bs; x_r = x_l + 1.0; 565 aa = a->a + j*bs2; 566 for (k=0; k<bs; k++) { 567 for (l=0; l<bs; l++) { 568 if (PetscRealPart(*aa++) <= 0.) continue; 569 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 570 } 571 } 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 577 #undef __FUNC__ 578 #define __FUNC__ "MatView_SeqBAIJ_Draw" 579 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 580 { 581 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 582 int ierr; 583 PetscReal xl,yl,xr,yr,w,h; 584 Draw draw; 585 PetscTruth isnull; 586 587 PetscFunctionBegin; 588 589 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 590 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 591 592 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 593 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 594 xr += w; yr += h; xl = -w; yl = -h; 595 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 596 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 597 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatView_SeqBAIJ" 603 int MatView_SeqBAIJ(Mat A,Viewer viewer) 604 { 605 int ierr; 606 PetscTruth issocket,isascii,isbinary,isdraw; 607 608 PetscFunctionBegin; 609 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 610 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 611 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 612 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 613 if (issocket) { 614 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 615 } else if (isascii){ 616 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 617 } else if (isbinary) { 618 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 619 } else if (isdraw) { 620 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 621 } else { 622 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 623 } 624 PetscFunctionReturn(0); 625 } 626 627 628 #undef __FUNC__ 629 #define __FUNC__ "MatGetValues_SeqBAIJ" 630 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 631 { 632 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 633 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 634 int *ai = a->i,*ailen = a->ilen; 635 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 636 MatScalar *ap,*aa = a->a,zero = 0.0; 637 638 PetscFunctionBegin; 639 for (k=0; k<m; k++) { /* loop over rows */ 640 row = im[k]; brow = row/bs; 641 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 642 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 643 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 644 nrow = ailen[brow]; 645 for (l=0; l<n; l++) { /* loop over columns */ 646 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 647 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 648 col = in[l] ; 649 bcol = col/bs; 650 cidx = col%bs; 651 ridx = row%bs; 652 high = nrow; 653 low = 0; /* assume unsorted */ 654 while (high-low > 5) { 655 t = (low+high)/2; 656 if (rp[t] > bcol) high = t; 657 else low = t; 658 } 659 for (i=low; i<high; i++) { 660 if (rp[i] > bcol) break; 661 if (rp[i] == bcol) { 662 *v++ = ap[bs2*i+bs*cidx+ridx]; 663 goto finished; 664 } 665 } 666 *v++ = zero; 667 finished:; 668 } 669 } 670 PetscFunctionReturn(0); 671 } 672 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 676 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 677 { 678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 679 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 680 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 681 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 682 Scalar *value = v; 683 MatScalar *ap,*aa=a->a,*bap; 684 685 PetscFunctionBegin; 686 if (roworiented) { 687 stepval = (n-1)*bs; 688 } else { 689 stepval = (m-1)*bs; 690 } 691 for (k=0; k<m; k++) { /* loop over added rows */ 692 row = im[k]; 693 if (row < 0) continue; 694 #if defined(PETSC_USE_BOPT_g) 695 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 696 #endif 697 rp = aj + ai[row]; 698 ap = aa + bs2*ai[row]; 699 rmax = imax[row]; 700 nrow = ailen[row]; 701 low = 0; 702 for (l=0; l<n; l++) { /* loop over added columns */ 703 if (in[l] < 0) continue; 704 #if defined(PETSC_USE_BOPT_g) 705 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 706 #endif 707 col = in[l]; 708 if (roworiented) { 709 value = v + k*(stepval+bs)*bs + l*bs; 710 } else { 711 value = v + l*(stepval+bs)*bs + k*bs; 712 } 713 if (!sorted) low = 0; high = nrow; 714 while (high-low > 7) { 715 t = (low+high)/2; 716 if (rp[t] > col) high = t; 717 else low = t; 718 } 719 for (i=low; i<high; i++) { 720 if (rp[i] > col) break; 721 if (rp[i] == col) { 722 bap = ap + bs2*i; 723 if (roworiented) { 724 if (is == ADD_VALUES) { 725 for (ii=0; ii<bs; ii++,value+=stepval) { 726 for (jj=ii; jj<bs2; jj+=bs) { 727 bap[jj] += *value++; 728 } 729 } 730 } else { 731 for (ii=0; ii<bs; ii++,value+=stepval) { 732 for (jj=ii; jj<bs2; jj+=bs) { 733 bap[jj] = *value++; 734 } 735 } 736 } 737 } else { 738 if (is == ADD_VALUES) { 739 for (ii=0; ii<bs; ii++,value+=stepval) { 740 for (jj=0; jj<bs; jj++) { 741 *bap++ += *value++; 742 } 743 } 744 } else { 745 for (ii=0; ii<bs; ii++,value+=stepval) { 746 for (jj=0; jj<bs; jj++) { 747 *bap++ = *value++; 748 } 749 } 750 } 751 } 752 goto noinsert2; 753 } 754 } 755 if (nonew == 1) goto noinsert2; 756 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 757 if (nrow >= rmax) { 758 /* there is no extra room in row, therefore enlarge */ 759 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 760 MatScalar *new_a; 761 762 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 763 764 /* malloc new storage space */ 765 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 766 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 767 new_j = (int*)(new_a + bs2*new_nz); 768 new_i = new_j + new_nz; 769 770 /* copy over old data into new slots */ 771 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 772 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 773 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 774 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 775 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 776 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 777 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 778 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 779 /* free up old matrix storage */ 780 ierr = PetscFree(a->a);CHKERRQ(ierr); 781 if (!a->singlemalloc) { 782 ierr = PetscFree(a->i);CHKERRQ(ierr); 783 ierr = PetscFree(a->j);CHKERRQ(ierr); 784 } 785 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 786 a->singlemalloc = PETSC_TRUE; 787 788 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 789 rmax = imax[row] = imax[row] + CHUNKSIZE; 790 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 791 a->maxnz += bs2*CHUNKSIZE; 792 a->reallocs++; 793 a->nz++; 794 } 795 N = nrow++ - 1; 796 /* shift up all the later entries in this row */ 797 for (ii=N; ii>=i; ii--) { 798 rp[ii+1] = rp[ii]; 799 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 800 } 801 if (N >= i) { 802 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 803 } 804 rp[i] = col; 805 bap = ap + bs2*i; 806 if (roworiented) { 807 for (ii=0; ii<bs; ii++,value+=stepval) { 808 for (jj=ii; jj<bs2; jj+=bs) { 809 bap[jj] = *value++; 810 } 811 } 812 } else { 813 for (ii=0; ii<bs; ii++,value+=stepval) { 814 for (jj=0; jj<bs; jj++) { 815 *bap++ = *value++; 816 } 817 } 818 } 819 noinsert2:; 820 low = i; 821 } 822 ailen[row] = nrow; 823 } 824 PetscFunctionReturn(0); 825 } 826 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 830 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 831 { 832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 833 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 834 int m = a->m,*ip,N,*ailen = a->ilen; 835 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 836 MatScalar *aa = a->a,*ap; 837 838 PetscFunctionBegin; 839 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 840 841 if (m) rmax = ailen[0]; 842 for (i=1; i<mbs; i++) { 843 /* move each row back by the amount of empty slots (fshift) before it*/ 844 fshift += imax[i-1] - ailen[i-1]; 845 rmax = PetscMax(rmax,ailen[i]); 846 if (fshift) { 847 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 848 N = ailen[i]; 849 for (j=0; j<N; j++) { 850 ip[j-fshift] = ip[j]; 851 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 852 } 853 } 854 ai[i] = ai[i-1] + ailen[i-1]; 855 } 856 if (mbs) { 857 fshift += imax[mbs-1] - ailen[mbs-1]; 858 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 859 } 860 /* reset ilen and imax for each row */ 861 for (i=0; i<mbs; i++) { 862 ailen[i] = imax[i] = ai[i+1] - ai[i]; 863 } 864 a->nz = ai[mbs]; 865 866 /* diagonals may have moved, so kill the diagonal pointers */ 867 if (fshift && a->diag) { 868 ierr = PetscFree(a->diag);CHKERRQ(ierr); 869 PLogObjectMemory(A,-(m+1)*sizeof(int)); 870 a->diag = 0; 871 } 872 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 873 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 874 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 875 a->reallocs); 876 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 877 a->reallocs = 0; 878 A->info.nz_unneeded = (PetscReal)fshift*bs2; 879 880 PetscFunctionReturn(0); 881 } 882 883 884 885 /* 886 This function returns an array of flags which indicate the locations of contiguous 887 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 888 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 889 Assume: sizes should be long enough to hold all the values. 890 */ 891 #undef __FUNC__ 892 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 893 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 894 { 895 int i,j,k,row; 896 PetscTruth flg; 897 898 PetscFunctionBegin; 899 for (i=0,j=0; i<n; j++) { 900 row = idx[i]; 901 if (row%bs!=0) { /* Not the begining of a block */ 902 sizes[j] = 1; 903 i++; 904 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 905 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 906 i++; 907 } else { /* Begining of the block, so check if the complete block exists */ 908 flg = PETSC_TRUE; 909 for (k=1; k<bs; k++) { 910 if (row+k != idx[i+k]) { /* break in the block */ 911 flg = PETSC_FALSE; 912 break; 913 } 914 } 915 if (flg == PETSC_TRUE) { /* No break in the bs */ 916 sizes[j] = bs; 917 i+= bs; 918 } else { 919 sizes[j] = 1; 920 i++; 921 } 922 } 923 } 924 *bs_max = j; 925 PetscFunctionReturn(0); 926 } 927 928 #undef __FUNC__ 929 #define __FUNC__ "MatZeroRows_SeqBAIJ" 930 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 931 { 932 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 933 int ierr,i,j,k,count,is_n,*is_idx,*rows; 934 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 935 Scalar zero = 0.0; 936 MatScalar *aa; 937 938 PetscFunctionBegin; 939 /* Make a copy of the IS and sort it */ 940 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 941 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 942 943 /* allocate memory for rows,sizes */ 944 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 945 sizes = rows + is_n; 946 947 /* initialize copy IS valurs to rows, and sort them */ 948 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 949 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 950 if (baij->keepzeroedrows) { 951 for (i=0; i<is_n; i++) { sizes[i] = 1; } 952 bs_max = is_n; 953 } else { 954 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 955 } 956 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 957 958 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 959 row = rows[j]; 960 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 961 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 962 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 963 if (sizes[i] == bs && !baij->keepzeroedrows) { 964 if (diag) { 965 if (baij->ilen[row/bs] > 0) { 966 baij->ilen[row/bs] = 1; 967 baij->j[baij->i[row/bs]] = row/bs; 968 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 969 } 970 /* Now insert all the diagoanl values for this bs */ 971 for (k=0; k<bs; k++) { 972 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 973 } 974 } else { /* (!diag) */ 975 baij->ilen[row/bs] = 0; 976 } /* end (!diag) */ 977 } else { /* (sizes[i] != bs) */ 978 #if defined (PETSC_USE_DEBUG) 979 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 980 #endif 981 for (k=0; k<count; k++) { 982 aa[0] = zero; 983 aa+=bs; 984 } 985 if (diag) { 986 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 987 } 988 } 989 } 990 991 ierr = PetscFree(rows);CHKERRQ(ierr); 992 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNC__ 997 #define __FUNC__ "MatSetValues_SeqBAIJ" 998 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 999 { 1000 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1001 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1002 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 1003 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1004 int ridx,cidx,bs2=a->bs2,ierr; 1005 MatScalar *ap,value,*aa=a->a,*bap; 1006 1007 PetscFunctionBegin; 1008 for (k=0; k<m; k++) { /* loop over added rows */ 1009 row = im[k]; brow = row/bs; 1010 if (row < 0) continue; 1011 #if defined(PETSC_USE_BOPT_g) 1012 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1013 #endif 1014 rp = aj + ai[brow]; 1015 ap = aa + bs2*ai[brow]; 1016 rmax = imax[brow]; 1017 nrow = ailen[brow]; 1018 low = 0; 1019 for (l=0; l<n; l++) { /* loop over added columns */ 1020 if (in[l] < 0) continue; 1021 #if defined(PETSC_USE_BOPT_g) 1022 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1023 #endif 1024 col = in[l]; bcol = col/bs; 1025 ridx = row % bs; cidx = col % bs; 1026 if (roworiented) { 1027 value = v[l + k*n]; 1028 } else { 1029 value = v[k + l*m]; 1030 } 1031 if (!sorted) low = 0; high = nrow; 1032 while (high-low > 7) { 1033 t = (low+high)/2; 1034 if (rp[t] > bcol) high = t; 1035 else low = t; 1036 } 1037 for (i=low; i<high; i++) { 1038 if (rp[i] > bcol) break; 1039 if (rp[i] == bcol) { 1040 bap = ap + bs2*i + bs*cidx + ridx; 1041 if (is == ADD_VALUES) *bap += value; 1042 else *bap = value; 1043 goto noinsert1; 1044 } 1045 } 1046 if (nonew == 1) goto noinsert1; 1047 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1048 if (nrow >= rmax) { 1049 /* there is no extra room in row, therefore enlarge */ 1050 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1051 MatScalar *new_a; 1052 1053 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1054 1055 /* Malloc new storage space */ 1056 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1057 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 1058 new_j = (int*)(new_a + bs2*new_nz); 1059 new_i = new_j + new_nz; 1060 1061 /* copy over old data into new slots */ 1062 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1063 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1064 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1065 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1066 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1067 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1068 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1069 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1070 /* free up old matrix storage */ 1071 ierr = PetscFree(a->a);CHKERRQ(ierr); 1072 if (!a->singlemalloc) { 1073 ierr = PetscFree(a->i);CHKERRQ(ierr); 1074 ierr = PetscFree(a->j);CHKERRQ(ierr); 1075 } 1076 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1077 a->singlemalloc = PETSC_TRUE; 1078 1079 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1080 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1081 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1082 a->maxnz += bs2*CHUNKSIZE; 1083 a->reallocs++; 1084 a->nz++; 1085 } 1086 N = nrow++ - 1; 1087 /* shift up all the later entries in this row */ 1088 for (ii=N; ii>=i; ii--) { 1089 rp[ii+1] = rp[ii]; 1090 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1091 } 1092 if (N>=i) { 1093 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1094 } 1095 rp[i] = bcol; 1096 ap[bs2*i + bs*cidx + ridx] = value; 1097 noinsert1:; 1098 low = i; 1099 } 1100 ailen[brow] = nrow; 1101 } 1102 PetscFunctionReturn(0); 1103 } 1104 1105 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*); 1106 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal); 1107 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1108 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1109 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1110 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1111 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1112 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1113 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1114 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1115 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1116 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1117 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1118 extern int MatZeroEntries_SeqBAIJ(Mat); 1119 1120 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1121 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1122 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1123 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1124 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1125 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1126 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1127 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1128 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1129 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1130 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1131 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1132 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1133 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1134 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1135 1136 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1137 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1138 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1139 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1140 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1141 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1142 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1143 1144 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1145 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1146 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1147 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1148 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1149 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1150 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1151 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1152 1153 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1154 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1155 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1156 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1157 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1158 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1159 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1160 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1161 1162 #undef __FUNC__ 1163 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1164 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1165 { 1166 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1167 Mat outA; 1168 int ierr; 1169 PetscTruth row_identity,col_identity; 1170 1171 PetscFunctionBegin; 1172 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1173 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1174 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1175 if (!row_identity || !col_identity) { 1176 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1177 } 1178 1179 outA = inA; 1180 inA->factor = FACTOR_LU; 1181 1182 if (!a->diag) { 1183 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1184 } 1185 /* 1186 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1187 for ILU(0) factorization with natural ordering 1188 */ 1189 switch (a->bs) { 1190 case 1: 1191 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1192 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1193 case 2: 1194 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1195 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1196 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1197 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1198 break; 1199 case 3: 1200 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1201 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1202 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1203 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1204 break; 1205 case 4: 1206 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1207 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1208 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1209 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1210 break; 1211 case 5: 1212 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1213 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1214 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1215 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1216 break; 1217 case 6: 1218 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1219 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1220 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1221 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1222 break; 1223 case 7: 1224 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1225 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1226 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1227 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1228 break; 1229 default: 1230 a->row = row; 1231 a->col = col; 1232 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1233 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1234 1235 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1236 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1237 PLogObjectParent(inA,a->icol); 1238 1239 if (!a->solve_work) { 1240 a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1241 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1242 } 1243 } 1244 1245 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1246 1247 PetscFunctionReturn(0); 1248 } 1249 #undef __FUNC__ 1250 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1251 int MatPrintHelp_SeqBAIJ(Mat A) 1252 { 1253 static PetscTruth called = PETSC_FALSE; 1254 MPI_Comm comm = A->comm; 1255 int ierr; 1256 1257 PetscFunctionBegin; 1258 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1259 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1260 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 EXTERN_C_BEGIN 1265 #undef __FUNC__ 1266 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1267 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1268 { 1269 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1270 int i,nz,n; 1271 1272 PetscFunctionBegin; 1273 nz = baij->maxnz; 1274 n = baij->n; 1275 for (i=0; i<nz; i++) { 1276 baij->j[i] = indices[i]; 1277 } 1278 baij->nz = nz; 1279 for (i=0; i<n; i++) { 1280 baij->ilen[i] = baij->imax[i]; 1281 } 1282 1283 PetscFunctionReturn(0); 1284 } 1285 EXTERN_C_END 1286 1287 #undef __FUNC__ 1288 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1289 /*@ 1290 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1291 in the matrix. 1292 1293 Input Parameters: 1294 + mat - the SeqBAIJ matrix 1295 - indices - the column indices 1296 1297 Level: advanced 1298 1299 Notes: 1300 This can be called if you have precomputed the nonzero structure of the 1301 matrix and want to provide it to the matrix object to improve the performance 1302 of the MatSetValues() operation. 1303 1304 You MUST have set the correct numbers of nonzeros per row in the call to 1305 MatCreateSeqBAIJ(). 1306 1307 MUST be called before any calls to MatSetValues(); 1308 1309 @*/ 1310 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1311 { 1312 int ierr,(*f)(Mat,int *); 1313 1314 PetscFunctionBegin; 1315 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1316 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1317 if (f) { 1318 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1319 } else { 1320 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1321 } 1322 PetscFunctionReturn(0); 1323 } 1324 1325 /* -------------------------------------------------------------------*/ 1326 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1327 MatGetRow_SeqBAIJ, 1328 MatRestoreRow_SeqBAIJ, 1329 MatMult_SeqBAIJ_N, 1330 MatMultAdd_SeqBAIJ_N, 1331 MatMultTranspose_SeqBAIJ, 1332 MatMultTransposeAdd_SeqBAIJ, 1333 MatSolve_SeqBAIJ_N, 1334 0, 1335 0, 1336 0, 1337 MatLUFactor_SeqBAIJ, 1338 0, 1339 0, 1340 MatTranspose_SeqBAIJ, 1341 MatGetInfo_SeqBAIJ, 1342 MatEqual_SeqBAIJ, 1343 MatGetDiagonal_SeqBAIJ, 1344 MatDiagonalScale_SeqBAIJ, 1345 MatNorm_SeqBAIJ, 1346 0, 1347 MatAssemblyEnd_SeqBAIJ, 1348 0, 1349 MatSetOption_SeqBAIJ, 1350 MatZeroEntries_SeqBAIJ, 1351 MatZeroRows_SeqBAIJ, 1352 MatLUFactorSymbolic_SeqBAIJ, 1353 MatLUFactorNumeric_SeqBAIJ_N, 1354 0, 1355 0, 1356 MatGetSize_SeqBAIJ, 1357 MatGetSize_SeqBAIJ, 1358 MatGetOwnershipRange_SeqBAIJ, 1359 MatILUFactorSymbolic_SeqBAIJ, 1360 0, 1361 0, 1362 0, 1363 MatDuplicate_SeqBAIJ, 1364 0, 1365 0, 1366 MatILUFactor_SeqBAIJ, 1367 0, 1368 0, 1369 MatGetSubMatrices_SeqBAIJ, 1370 MatIncreaseOverlap_SeqBAIJ, 1371 MatGetValues_SeqBAIJ, 1372 0, 1373 MatPrintHelp_SeqBAIJ, 1374 MatScale_SeqBAIJ, 1375 0, 1376 0, 1377 0, 1378 MatGetBlockSize_SeqBAIJ, 1379 MatGetRowIJ_SeqBAIJ, 1380 MatRestoreRowIJ_SeqBAIJ, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 MatSetValuesBlocked_SeqBAIJ, 1388 MatGetSubMatrix_SeqBAIJ, 1389 0, 1390 0, 1391 MatGetMaps_Petsc}; 1392 1393 EXTERN_C_BEGIN 1394 #undef __FUNC__ 1395 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1396 int MatStoreValues_SeqBAIJ(Mat mat) 1397 { 1398 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1399 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1400 int ierr; 1401 1402 PetscFunctionBegin; 1403 if (aij->nonew != 1) { 1404 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1405 } 1406 1407 /* allocate space for values if not already there */ 1408 if (!aij->saved_values) { 1409 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1410 } 1411 1412 /* copy values over */ 1413 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 EXTERN_C_END 1417 1418 EXTERN_C_BEGIN 1419 #undef __FUNC__ 1420 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1421 int MatRetrieveValues_SeqBAIJ(Mat mat) 1422 { 1423 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1424 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1425 1426 PetscFunctionBegin; 1427 if (aij->nonew != 1) { 1428 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1429 } 1430 if (!aij->saved_values) { 1431 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1432 } 1433 1434 /* copy values over */ 1435 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1436 PetscFunctionReturn(0); 1437 } 1438 EXTERN_C_END 1439 1440 #undef __FUNC__ 1441 #define __FUNC__ "MatCreateSeqBAIJ" 1442 /*@C 1443 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1444 compressed row) format. For good matrix assembly performance the 1445 user should preallocate the matrix storage by setting the parameter nz 1446 (or the array nnz). By setting these parameters accurately, performance 1447 during matrix assembly can be increased by more than a factor of 50. 1448 1449 Collective on MPI_Comm 1450 1451 Input Parameters: 1452 + comm - MPI communicator, set to PETSC_COMM_SELF 1453 . bs - size of block 1454 . m - number of rows 1455 . n - number of columns 1456 . nz - number of block nonzeros per block row (same for all rows) 1457 - nnz - array containing the number of block nonzeros in the various block rows 1458 (possibly different for each block row) or PETSC_NULL 1459 1460 Output Parameter: 1461 . A - the matrix 1462 1463 Options Database Keys: 1464 . -mat_no_unroll - uses code that does not unroll the loops in the 1465 block calculations (much slower) 1466 . -mat_block_size - size of the blocks to use 1467 1468 Level: intermediate 1469 1470 Notes: 1471 The block AIJ format is fully compatible with standard Fortran 77 1472 storage. That is, the stored row and column indices can begin at 1473 either one (as in Fortran) or zero. See the users' manual for details. 1474 1475 Specify the preallocated storage with either nz or nnz (not both). 1476 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1477 allocation. For additional details, see the users manual chapter on 1478 matrices. 1479 1480 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1481 @*/ 1482 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1483 { 1484 Mat B; 1485 Mat_SeqBAIJ *b; 1486 int i,len,ierr,mbs,nbs,bs2,size; 1487 PetscTruth flg; 1488 1489 PetscFunctionBegin; 1490 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1491 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1492 1493 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1494 mbs = m/bs; 1495 nbs = n/bs; 1496 bs2 = bs*bs; 1497 1498 if (mbs*bs!=m || nbs*bs!=n) { 1499 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1500 } 1501 1502 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1503 if (nnz) { 1504 for (i=0; i<mbs; i++) { 1505 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1506 } 1507 } 1508 1509 *A = 0; 1510 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1511 PLogObjectCreate(B); 1512 B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1513 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1514 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1515 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1516 if (!flg) { 1517 switch (bs) { 1518 case 1: 1519 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1520 B->ops->solve = MatSolve_SeqBAIJ_1; 1521 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1522 B->ops->mult = MatMult_SeqBAIJ_1; 1523 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1524 break; 1525 case 2: 1526 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1527 B->ops->solve = MatSolve_SeqBAIJ_2; 1528 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1529 B->ops->mult = MatMult_SeqBAIJ_2; 1530 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1531 break; 1532 case 3: 1533 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1534 B->ops->solve = MatSolve_SeqBAIJ_3; 1535 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1536 B->ops->mult = MatMult_SeqBAIJ_3; 1537 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1538 break; 1539 case 4: 1540 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1541 B->ops->solve = MatSolve_SeqBAIJ_4; 1542 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1543 B->ops->mult = MatMult_SeqBAIJ_4; 1544 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1545 break; 1546 case 5: 1547 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1548 B->ops->solve = MatSolve_SeqBAIJ_5; 1549 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1550 B->ops->mult = MatMult_SeqBAIJ_5; 1551 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1552 break; 1553 case 6: 1554 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1555 B->ops->solve = MatSolve_SeqBAIJ_6; 1556 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1557 B->ops->mult = MatMult_SeqBAIJ_6; 1558 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1559 break; 1560 case 7: 1561 B->ops->mult = MatMult_SeqBAIJ_7; 1562 B->ops->solve = MatSolve_SeqBAIJ_7; 1563 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 1564 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1565 break; 1566 } 1567 } 1568 B->ops->destroy = MatDestroy_SeqBAIJ; 1569 B->ops->view = MatView_SeqBAIJ; 1570 B->factor = 0; 1571 B->lupivotthreshold = 1.0; 1572 B->mapping = 0; 1573 b->row = 0; 1574 b->col = 0; 1575 b->icol = 0; 1576 b->reallocs = 0; 1577 b->saved_values = 0; 1578 1579 b->m = m; B->m = m; B->M = m; 1580 b->n = n; B->n = n; B->N = n; 1581 1582 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1583 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1584 1585 b->mbs = mbs; 1586 b->nbs = nbs; 1587 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1588 if (!nnz) { 1589 if (nz == PETSC_DEFAULT) nz = 5; 1590 else if (nz <= 0) nz = 1; 1591 for (i=0; i<mbs; i++) b->imax[i] = nz; 1592 nz = nz*mbs; 1593 } else { 1594 nz = 0; 1595 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1596 } 1597 1598 /* allocate the matrix space */ 1599 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1600 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1601 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1602 b->j = (int*)(b->a + nz*bs2); 1603 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1604 b->i = b->j + nz; 1605 b->singlemalloc = PETSC_TRUE; 1606 1607 b->i[0] = 0; 1608 for (i=1; i<mbs+1; i++) { 1609 b->i[i] = b->i[i-1] + b->imax[i-1]; 1610 } 1611 1612 /* b->ilen will count nonzeros in each block row so far. */ 1613 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1614 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1615 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1616 1617 b->bs = bs; 1618 b->bs2 = bs2; 1619 b->mbs = mbs; 1620 b->nz = 0; 1621 b->maxnz = nz*bs2; 1622 b->sorted = PETSC_FALSE; 1623 b->roworiented = PETSC_TRUE; 1624 b->nonew = 0; 1625 b->diag = 0; 1626 b->solve_work = 0; 1627 b->mult_work = 0; 1628 b->spptr = 0; 1629 B->info.nz_unneeded = (PetscReal)b->maxnz; 1630 b->keepzeroedrows = PETSC_FALSE; 1631 1632 *A = B; 1633 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1634 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1635 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1637 "MatStoreValues_SeqBAIJ", 1638 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1640 "MatRetrieveValues_SeqBAIJ", 1641 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1643 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1644 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1645 PetscFunctionReturn(0); 1646 } 1647 1648 #undef __FUNC__ 1649 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1650 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1651 { 1652 Mat C; 1653 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1654 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1655 1656 PetscFunctionBegin; 1657 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1658 1659 *B = 0; 1660 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1661 PLogObjectCreate(C); 1662 C->data = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1663 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1664 C->ops->destroy = MatDestroy_SeqBAIJ; 1665 C->ops->view = MatView_SeqBAIJ; 1666 C->factor = A->factor; 1667 c->row = 0; 1668 c->col = 0; 1669 c->icol = 0; 1670 c->saved_values = 0; 1671 c->keepzeroedrows = a->keepzeroedrows; 1672 C->assembled = PETSC_TRUE; 1673 1674 c->m = C->m = a->m; 1675 c->n = C->n = a->n; 1676 C->M = a->m; 1677 C->N = a->n; 1678 1679 c->bs = a->bs; 1680 c->bs2 = a->bs2; 1681 c->mbs = a->mbs; 1682 c->nbs = a->nbs; 1683 1684 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1685 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1686 for (i=0; i<mbs; i++) { 1687 c->imax[i] = a->imax[i]; 1688 c->ilen[i] = a->ilen[i]; 1689 } 1690 1691 /* allocate the matrix space */ 1692 c->singlemalloc = PETSC_TRUE; 1693 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1694 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1695 c->j = (int*)(c->a + nz*bs2); 1696 c->i = c->j + nz; 1697 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1698 if (mbs > 0) { 1699 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1700 if (cpvalues == MAT_COPY_VALUES) { 1701 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1702 } else { 1703 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1704 } 1705 } 1706 1707 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1708 c->sorted = a->sorted; 1709 c->roworiented = a->roworiented; 1710 c->nonew = a->nonew; 1711 1712 if (a->diag) { 1713 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1714 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1715 for (i=0; i<mbs; i++) { 1716 c->diag[i] = a->diag[i]; 1717 } 1718 } else c->diag = 0; 1719 c->nz = a->nz; 1720 c->maxnz = a->maxnz; 1721 c->solve_work = 0; 1722 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1723 c->mult_work = 0; 1724 *B = C; 1725 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNC__ 1730 #define __FUNC__ "MatLoad_SeqBAIJ" 1731 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1732 { 1733 Mat_SeqBAIJ *a; 1734 Mat B; 1735 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1736 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1737 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1738 int *masked,nmask,tmp,bs2,ishift; 1739 Scalar *aa; 1740 MPI_Comm comm = ((PetscObject)viewer)->comm; 1741 1742 PetscFunctionBegin; 1743 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1744 bs2 = bs*bs; 1745 1746 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1747 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1748 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1749 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1750 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1751 M = header[1]; N = header[2]; nz = header[3]; 1752 1753 if (header[3] < 0) { 1754 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1755 } 1756 1757 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1758 1759 /* 1760 This code adds extra rows to make sure the number of rows is 1761 divisible by the blocksize 1762 */ 1763 mbs = M/bs; 1764 extra_rows = bs - M + bs*(mbs); 1765 if (extra_rows == bs) extra_rows = 0; 1766 else mbs++; 1767 if (extra_rows) { 1768 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1769 } 1770 1771 /* read in row lengths */ 1772 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1773 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1774 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1775 1776 /* read in column indices */ 1777 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1778 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1779 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1780 1781 /* loop over row lengths determining block row lengths */ 1782 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1783 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1784 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1785 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1786 masked = mask + mbs; 1787 rowcount = 0; nzcount = 0; 1788 for (i=0; i<mbs; i++) { 1789 nmask = 0; 1790 for (j=0; j<bs; j++) { 1791 kmax = rowlengths[rowcount]; 1792 for (k=0; k<kmax; k++) { 1793 tmp = jj[nzcount++]/bs; 1794 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1795 } 1796 rowcount++; 1797 } 1798 browlengths[i] += nmask; 1799 /* zero out the mask elements we set */ 1800 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1801 } 1802 1803 /* create our matrix */ 1804 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1805 B = *A; 1806 a = (Mat_SeqBAIJ*)B->data; 1807 1808 /* set matrix "i" values */ 1809 a->i[0] = 0; 1810 for (i=1; i<= mbs; i++) { 1811 a->i[i] = a->i[i-1] + browlengths[i-1]; 1812 a->ilen[i-1] = browlengths[i-1]; 1813 } 1814 a->nz = 0; 1815 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1816 1817 /* read in nonzero values */ 1818 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1819 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1820 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1821 1822 /* set "a" and "j" values into matrix */ 1823 nzcount = 0; jcount = 0; 1824 for (i=0; i<mbs; i++) { 1825 nzcountb = nzcount; 1826 nmask = 0; 1827 for (j=0; j<bs; j++) { 1828 kmax = rowlengths[i*bs+j]; 1829 for (k=0; k<kmax; k++) { 1830 tmp = jj[nzcount++]/bs; 1831 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1832 } 1833 rowcount++; 1834 } 1835 /* sort the masked values */ 1836 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1837 1838 /* set "j" values into matrix */ 1839 maskcount = 1; 1840 for (j=0; j<nmask; j++) { 1841 a->j[jcount++] = masked[j]; 1842 mask[masked[j]] = maskcount++; 1843 } 1844 /* set "a" values into matrix */ 1845 ishift = bs2*a->i[i]; 1846 for (j=0; j<bs; j++) { 1847 kmax = rowlengths[i*bs+j]; 1848 for (k=0; k<kmax; k++) { 1849 tmp = jj[nzcountb]/bs ; 1850 block = mask[tmp] - 1; 1851 point = jj[nzcountb] - bs*tmp; 1852 idx = ishift + bs2*block + j + bs*point; 1853 a->a[idx] = (MatScalar)aa[nzcountb++]; 1854 } 1855 } 1856 /* zero out the mask elements we set */ 1857 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1858 } 1859 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1860 1861 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1862 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1863 ierr = PetscFree(aa);CHKERRQ(ierr); 1864 ierr = PetscFree(jj);CHKERRQ(ierr); 1865 ierr = PetscFree(mask);CHKERRQ(ierr); 1866 1867 B->assembled = PETSC_TRUE; 1868 1869 ierr = MatView_Private(B);CHKERRQ(ierr); 1870 PetscFunctionReturn(0); 1871 } 1872 1873 1874 1875