Re: [Maria-developers] cc5c19ae523: MDEV-17401: LOAD DATA from very big file into MyISAM table results in EOF error and corrupt index
Hi, Oleksandr! On Oct 12, Oleksandr Byelkin wrote:
revision-id: cc5c19ae5233ba90de086de76043774ae6c78cd7 (mariadb-5.5.61-30-gcc5c19ae523) parent(s): acf8fc1ff8a7b2d49e25279670b04b8eb096ce0c author: Oleksandr Byelkin committer: Oleksandr Byelkin timestamp: 2018-10-12 09:07:05 +0200 message:
MDEV-17401: LOAD DATA from very big file into MyISAM table results in EOF error and corrupt index
my_read fixed as in higher versions. my_pread made as my_read aware of partial read of huge chunks of files MY_FULL_IO enabled for file operations
--- mysys/mf_iocache.c | 4 ++++ mysys/my_pread.c | 25 ++++++++++++++++++------- mysys/my_read.c | 52 ++++++++++++++++++++++++++++------------------------ 3 files changed, 50 insertions(+), 31 deletions(-)
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index bd71e2ae527..9a051cf6a9e 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -282,6 +282,10 @@ int init_io_cache(IO_CACHE *info, File file, size_t cachesize, } info->inited=info->aio_result.pending=0; #endif + if (type == READ_CACHE || type == WRITE_CACHE || type == SEQ_READ_APPEND) + info->myflags|= MY_FULL_IO; + else + info->myflags&= ~MY_FULL_IO;
1. that's a bit redundant, it'd be enough to do it in init_io_cache. but ok. 2. why you didn't remove MY_FULL_IO setting from reinit_io_cache?
DBUG_RETURN(0); } /* init_io_cache */
diff --git a/mysys/my_pread.c b/mysys/my_pread.c index 51b4c5f5599..5cbd6b4316b 100644 --- a/mysys/my_pread.c +++ b/mysys/my_pread.c @@ -47,8 +47,7 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, myf MyFlags) { - size_t readbytes; - int error= 0; + size_t readbytes, save_count= 0;
DBUG_ENTER("my_pread");
@@ -66,11 +65,10 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, #else readbytes= pread(Filedes, Buffer, Count, offset); #endif - error = (readbytes != Count);
- if (error) + if (readbytes != Count) { - my_errno= errno ? errno : -1; + my_errno= errno; if (errno == 0 || (readbytes != (size_t) -1 && (MyFlags & (MY_NABP | MY_FNABP)))) my_errno= HA_ERR_FILE_TOO_SHORT; @@ -82,6 +80,17 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, (int) readbytes)); continue; /* Interrupted */ } + + /* Do a read retry if we didn't get enough data on first read */ + if (readbytes != (size_t) -1 && readbytes != 0 && + (MyFlags & MY_FULL_IO)) + { + Buffer+= readbytes; + Count-= readbytes; + save_count+= readbytes; + continue;
eh, did you test your patch? I don't see how it could work without offset+= readbytes;
+ } + if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) { if (readbytes == (size_t) -1) @@ -97,8 +106,10 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ } if (MyFlags & (MY_NABP | MY_FNABP)) - DBUG_RETURN(0); /* Read went ok; Return 0 */ - DBUG_RETURN(readbytes); /* purecov: inspected */ + readbytes= 0; /* Read went ok; Return 0 */ + else + readbytes+= save_count; + DBUG_RETURN(readbytes); } } /* my_pread */
diff --git a/mysys/my_read.c b/mysys/my_read.c index 883a1c5fdc7..47f2d725f65 100644 --- a/mysys/my_read.c +++ b/mysys/my_read.c @@ -35,17 +35,16 @@
this is copied from 10.1 verbatim, I presume Regards, Sergei Chief Architect MariaDB and security@mariadb.org
Am 18.10.18 um 13:27 schrieb Sergei Golubchik: > Hi, Oleksandr! > > On Oct 12, Oleksandr Byelkin wrote: >> revision-id: cc5c19ae5233ba90de086de76043774ae6c78cd7 (mariadb-5.5.61-30-gcc5c19ae523) >> parent(s): acf8fc1ff8a7b2d49e25279670b04b8eb096ce0c >> author: Oleksandr Byelkin >> committer: Oleksandr Byelkin >> timestamp: 2018-10-12 09:07:05 +0200 >> message: >> >> MDEV-17401: LOAD DATA from very big file into MyISAM table results in EOF error and corrupt index >> >> my_read fixed as in higher versions. >> my_pread made as my_read aware of partial read of huge chunks of files >> MY_FULL_IO enabled for file operations >> >> --- >> mysys/mf_iocache.c | 4 ++++ >> mysys/my_pread.c | 25 ++++++++++++++++++------- >> mysys/my_read.c | 52 ++++++++++++++++++++++++++++------------------------ >> 3 files changed, 50 insertions(+), 31 deletions(-) >> >> diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c >> index bd71e2ae527..9a051cf6a9e 100644 >> --- a/mysys/mf_iocache.c >> +++ b/mysys/mf_iocache.c >> @@ -282,6 +282,10 @@ int init_io_cache(IO_CACHE *info, File file, size_t cachesize, >> } >> info->inited=info->aio_result.pending=0; >> #endif >> + if (type == READ_CACHE || type == WRITE_CACHE || type == SEQ_READ_APPEND) >> + info->myflags|= MY_FULL_IO; >> + else >> + info->myflags&= ~MY_FULL_IO; > 1. that's a bit redundant, it'd be enough to do it in init_io_cache. > but ok. > > 2. why you didn't remove MY_FULL_IO setting from reinit_io_cache? it is 5.5 so there is no reinit_io_cache and there is no place where we set MY_FULL_IO (it is not set at all, and this is why it had buggy version for my_read). > >> DBUG_RETURN(0); >> } /* init_io_cache */ >> >> diff --git a/mysys/my_pread.c b/mysys/my_pread.c >> index 51b4c5f5599..5cbd6b4316b 100644 >> --- a/mysys/my_pread.c >> +++ b/mysys/my_pread.c >> @@ -47,8 +47,7 @@ >> size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, >> myf MyFlags) >> { >> - size_t readbytes; >> - int error= 0; >> + size_t readbytes, save_count= 0; >> >> DBUG_ENTER("my_pread"); >> >> @@ -66,11 +65,10 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, >> #else >> readbytes= pread(Filedes, Buffer, Count, offset); >> #endif >> - error = (readbytes != Count); >> >> - if (error) >> + if (readbytes != Count) >> { >> - my_errno= errno ? errno : -1; >> + my_errno= errno; >> if (errno == 0 || (readbytes != (size_t) -1 && >> (MyFlags & (MY_NABP | MY_FNABP)))) >> my_errno= HA_ERR_FILE_TOO_SHORT; >> @@ -82,6 +80,17 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, >> (int) readbytes)); >> continue; /* Interrupted */ >> } >> + >> + /* Do a read retry if we didn't get enough data on first read */ >> + if (readbytes != (size_t) -1 && readbytes != 0 && >> + (MyFlags & MY_FULL_IO)) >> + { >> + Buffer+= readbytes; >> + Count-= readbytes; >> + save_count+= readbytes; >> + continue; > eh, did you test your patch? I don't see how it could work without > > offset+= readbytes; it advance buffer pointer (as in correct version of my_read, we can not advance both). > >> + } >> + >> if (MyFlags & (MY_WME | MY_FAE | MY_FNABP)) >> { >> if (readbytes == (size_t) -1) >> @@ -97,8 +106,10 @@ size_t my_pread(File Filedes, uchar *Buffer, size_t Count, my_off_t offset, >> DBUG_RETURN(MY_FILE_ERROR); /* Return with error */ >> } >> if (MyFlags & (MY_NABP | MY_FNABP)) >> - DBUG_RETURN(0); /* Read went ok; Return 0 */ >> - DBUG_RETURN(readbytes); /* purecov: inspected */ >> + readbytes= 0; /* Read went ok; Return 0 */ >> + else >> + readbytes+= save_count; >> + DBUG_RETURN(readbytes); >> } >> } /* my_pread */ >> >> diff --git a/mysys/my_read.c b/mysys/my_read.c >> index 883a1c5fdc7..47f2d725f65 100644 >> --- a/mysys/my_read.c >> +++ b/mysys/my_read.c >> @@ -35,17 +35,16 @@ > this is copied from 10.1 verbatim, I presume Actually 10.3 but they looks like the same. > > Regards, > Sergei > Chief Architect MariaDB > and security@mariadb.org > > _______________________________________________ > Mailing list: https://launchpad.net/~maria-developers > Post to : maria-developers@lists.launchpad.net > Unsubscribe : https://launchpad.net/~maria-developers > More help : https://help.launchpad.net/ListHelp
participants (2)
-
Oleksandr Byelkin
-
Sergei Golubchik