HHH-14211 Switch to using oid for CLOB in PostgreSQL to avoid losing data after vacuumlo

This commit is contained in:
Christian Beikov 2021-11-09 11:43:53 +01:00
parent 9a9653689e
commit 77776a6af1
3 changed files with 109 additions and 41 deletions

View File

@ -114,7 +114,7 @@ public class PostgreSQL81Dialect extends Dialect {
registerColumnType( Types.BINARY, "bytea" );
registerColumnType( Types.LONGVARCHAR, "text" );
registerColumnType( Types.LONGVARBINARY, "bytea" );
registerColumnType( Types.CLOB, "text" );
registerColumnType( Types.CLOB, "oid" );
registerColumnType( Types.BLOB, "oid" );
registerColumnType( Types.NUMERIC, "numeric($p, $s)" );
registerColumnType( Types.OTHER, "uuid" );

View File

@ -65,9 +65,9 @@ public class PostgreSqlLobStringTest extends BaseCoreFunctionalTestCase {
" (?, ?, ?, -1)"
)) {
int index = 1;
statement.setString(index++, value1);
statement.setString(index++, value2);
statement.setString(index++, value3);
statement.setClob( index++, session.getLobHelper().createClob( value1 ) );
statement.setClob( index++, session.getLobHelper().createClob( value2 ) );
statement.setClob( index++, session.getLobHelper().createClob( value3 ) );
assertEquals( 1, statement.executeUpdate() );
}
@ -77,57 +77,63 @@ public class PostgreSqlLobStringTest extends BaseCoreFunctionalTestCase {
@Test
public void testBadClobDataSavedAsStringFails() {
try {
doInHibernate( this::sessionFactory, session -> {
final Query query = session.createQuery( "from TestEntity" );
doInHibernate( this::sessionFactory, session -> {
final Query query = session.createQuery( "from TestEntity" );
final List<TestEntity> results = query.list();
final List<TestEntity> results = query.list();
fail("Exception thrown expected");
} );
}
catch (Exception e) {
Exception rootException = (Exception) ExceptionUtil.rootCause( e );
assertTrue( rootException.getMessage().startsWith( "Bad value for type long" ) );
}
assertThat( results.size(), is( 1 ) );
final TestEntity testEntity = results.get( 0 );
assertThat( testEntity.getFirstLobField(), is( value1 ) );
assertThat( testEntity.getSecondLobField(), is( value2 ) );
final Clob clobField = testEntity.getClobField();
try {
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
}
catch (SQLException e) {
fail( e.getMessage() );
}
} );
}
@Test
public void testBadClobDataSavedAsStringworksAfterUpdate() {
doInHibernate( this::sessionFactory, session -> {
session.doWork( connection -> {
try(Statement statement = connection.createStatement()) {
statement.executeUpdate(
"update test_entity\n" +
"set \n" +
" clobfield = lo_from_bytea(0, cast(clobfield as bytea)),\n" +
" firstlobfield = lo_from_bytea(0, cast(firstlobfield as bytea)),\n" +
" secondlobfield = lo_from_bytea(0, cast(secondlobfield as bytea))"
);
}
} );
} );
session.doWork( connection -> {
try (Statement statement = connection.createStatement()) {
statement.executeUpdate(
"update test_entity\n" +
"set \n" +
" clobfield = lo_from_bytea(0, lo_get(clobfield)),\n" +
" firstlobfield = lo_from_bytea(0, lo_get(firstlobfield)),\n" +
" secondlobfield = lo_from_bytea(0, lo_get(secondlobfield))"
);
}
} );
} );
doInHibernate( this::sessionFactory, session -> {
final Query query = session.createQuery( "from TestEntity" );
final Query query = session.createQuery( "from TestEntity" );
final List<TestEntity> results = query.list();
final List<TestEntity> results = query.list();
assertThat( results.size(), is( 1 ) );
assertThat( results.size(), is( 1 ) );
final TestEntity testEntity = results.get( 0 );
assertThat( testEntity.getFirstLobField(), is( value1 ) );
assertThat( testEntity.getSecondLobField(), is( value2 ) );
final Clob clobField = testEntity.getClobField();
try {
final TestEntity testEntity = results.get( 0 );
assertThat( testEntity.getFirstLobField(), is( value1 ) );
assertThat( testEntity.getSecondLobField(), is( value2 ) );
final Clob clobField = testEntity.getClobField();
try {
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
}
catch (SQLException e) {
fail( e.getMessage() );
}
} );
assertThat( clobField.getSubString( 1, (int) clobField.length() ), is( value3 ) );
}
catch (SQLException e) {
fail( e.getMessage() );
}
} );
}
@Entity(name = "TestEntity")

View File

@ -19,3 +19,65 @@ configure `hibernate.bytecode.provider=javassist`: remove the property if you're
A side effect is that Hibenate ORM no longer lists javassist among its dependencies.
=== Changes to the DDL type for CLOB in PostgreSQL81Dialect and its subclasses
As of 5.6.2, the default PostgreSQL DDL type for CLOB columns i.e. fields annotated with `@Lob` or with the type `java.sql.Clob`
will be the `oid` type whereas before, the type `text` was used. The `text` type does not support streaming data
and is, even if TOASTed, materialized eagerly by the server, which is not what one would expect for LOB types.
All PostgreSQL JDBC drivers unfortunately just store the `oid` it created for a `java.sql.Clob` into the `text` column.
Although reading back the value with the CLOB API works, PostgreSQL has no knowledge of the reference to the LOB,
because the `oid` is not known to PostgreSQL, leading to data loss when `vacuumlo` (the utility to clean up unused LOBs) runs.
To avoid the data loss, it is required to use the `oid` type so that `vacuumlo` can see the reference.
Updating to 5.6.2 does not require any schema or application changes by default, but we highly recommend
that you migrate existing `text` columns for LOBs to `oid` to prevent data loss due to the activity of `vacuumlo`.
[source,sql]
----
alter table test_entity
alter column clobfield
set data type oid using clobfield::oid
----
If you are overriding the `JdbcTypeDescriptor` for `CLOB` to use e.g. `VarcharTypeDescriptor` in a custom PostgreSQL dialect,
beware that you will also have to override the column type in the custom dialect, as with "pgjdbc",
it is not possible to read/write an `oid` column with the JDBC `ResultSet#getString/Statement#setString` methods.
[source,java]
----
registerColumnType( Types.CLOB, "text" );
----
Alternatively, you can remove the `JdbcTypeDescriptor` override and migrate to `oid` with
[source,sql]
----
alter table test_entity
alter column clobfield
set data type oid using lo_from_bytea(0, cast(clobfield as bytea))
----
The switch to `oid` might have a negative impact on performance for small values that are fetched often,
because the value is stored in a different file system page than the row, even for small values
The benefit of the `oid` type is that it allows streaming the content and reduces the row size.
Users that just want a large text type but don't care about streaming should use the Hibernate type `text`:
[source,java]
----
@Entity
public class TestEntity {
@org.hibernate.annotations.Type( type = "text" )
String clobField;
//...
}
----
This will map to `java.sql.Types.LONGVARCHAR` for which Hibernate dialects register a DDL type that supports access
via the `ResultSet#getString/Statement#setString` methods i.e. in case of PostgreSQL the type `text`.
The `@Lob` annotation should only be used to force the use of the `ResultSet#getClob/Statement#setClob` JDBC driver methods,
which is in turn necessary for streaming data.