Ticket #1076: catch_and_handle_SIGXFSZ_diff.txt

File catch_and_handle_SIGXFSZ_diff.txt, 8.9 KB (added by buzz@…, 20 years ago)
Line 
1Index: libs/libmythtv/RingBuffer.cpp
2===================================================================
3--- libs/libmythtv/RingBuffer.cpp (revision 8639)
4+++ libs/libmythtv/RingBuffer.cpp (working copy)
5@@ -964,9 +964,16 @@
6 pthread_rwlock_rdlock(&rwlock);
7
8 ret = tfw->Write(buf, count);
9- writepos += ret;
10+ if ( ret != -1 ) writepos += ret;
11
12 pthread_rwlock_unlock(&rwlock);
13+
14+ if (ret == -1 ) {
15+ delete tfw ;
16+ tfw = NULL;
17+ VERBOSE(VB_IMPORTANT, LOC_ERR + "Write Failed abnormally!");
18+ }
19+
20 return ret;
21 }
22
23Index: libs/libmythtv/ThreadedFileWriter.cpp
24===================================================================
25--- libs/libmythtv/ThreadedFileWriter.cpp (revision 8639)
26+++ libs/libmythtv/ThreadedFileWriter.cpp (working copy)
27@@ -8,6 +8,7 @@
28 #include <sys/stat.h>
29 #include <unistd.h>
30 #include <fcntl.h>
31+#include <signal.h>
32
33 // MythTV headers
34 #include "ThreadedFileWriter.h"
35@@ -35,23 +36,32 @@
36 * to the stream.
37 */
38
39+int LastSignal = 0;
40+
41 /** \fn safe_write(int, const void*, uint)
42 * \brief Writes data to disk
43 *
44 * This just uses the Standard C write() to write to disk.
45 * We retry forever on EAGAIN errors, and three times on
46 * any other error.
47+ *
48+ * If the OS signaled us of a "bigger" underlying problem,
49+ * then we give up immediately, returning -1.
50 *
51 * \param fd File descriptor
52 * \param data Pointer to data to write
53 * \param sz Size of data to write in bytes
54 */
55-static uint safe_write(int fd, const void *data, uint sz)
56+static int safe_write(int fd, const void *data, uint sz)
57 {
58 int ret;
59 uint tot = 0;
60 uint errcnt = 0;
61+ extern int LastSignal;
62
63+ // write nothing if system limit reached
64+ if ( LastSignal == SIGXFSZ ) { LastSignal = 0; return -1; }
65+
66 while (tot < sz)
67 {
68 ret = write(fd, (char *)data + tot, sz - tot);
69@@ -90,8 +100,9 @@
70 void *ThreadedFileWriter::boot_writer(void *wotsit)
71 {
72 ThreadedFileWriter *fw = (ThreadedFileWriter *)wotsit;
73- fw->DiskLoop();
74- return NULL;
75+ void *err = (void *)fw->DiskLoop();
76+ //at this point fw is either destructed, or about to be....
77+ return err; // so the writer thread can be implicitly pthread_exit'd
78 }
79
80 /** \fn ThreadedFileWriter::boot_syncer(void*)
81@@ -159,14 +170,18 @@
82
83 if (fd >= 0)
84 {
85- Flush();
86- in_dtor = true; /* tells child thread to exit */
87+ /* flush, but only if other threads are sure to be still ok....*/
88+ if (in_dtor != true ) {
89+ Flush();
90+ }
91+ in_dtor = true; /* tells child threads to exit, if not already */
92
93- bufferSyncWait.wakeAll();
94- pthread_join(syncer, NULL);
95+ /* wait till the child threads have died*/
96+ bufferSyncWait.wakeAll(); //wake sync thread
97+ pthread_join(syncer, NULL); //wait for it to die
98+ bufferHasData.wakeAll(); //wake data thread
99+ pthread_join(writer, NULL); //wait for it to die
100
101- bufferHasData.wakeAll();
102- pthread_join(writer, NULL);
103 close(fd);
104 fd = -1;
105 }
106@@ -182,11 +197,12 @@
107 * \brief Writes data to the end of the write buffer
108 *
109 * NOTE: This blocks while buffer is in use by the write to disk thread.
110+ * NOTE2: returns -1 on write error.
111 *
112 * \param data pointer to data to write to disk
113 * \param count size of data in bytes
114 */
115-uint ThreadedFileWriter::Write(const void *data, uint count)
116+int ThreadedFileWriter::Write(const void *data, uint count)
117 {
118 if (count == 0)
119 return 0;
120@@ -207,6 +223,10 @@
121 if (!first)
122 VERBOSE(VB_IMPORTANT, LOC_ERR + "Write() -- IOBOUND end");
123
124+ // catastrophic write failure:
125+ if (no_writes && in_dtor)
126+ return -1;
127+
128 if (no_writes)
129 return 0;
130
131@@ -266,7 +286,7 @@
132 /** \fn ThreadedFileWriter::Sync(void)
133 * \brief flush data written to the file descriptor to disk.
134 *
135- * NOTE: This doesn't even try flush our queue of data.
136+ * NOTE: This doesn't even try to flush our queue of data.
137 * This only ensures that data which has already been sent
138 * to the kernel for this file is written to disk. This
139 * means that if this backend is writing the data over a
140@@ -335,11 +355,12 @@
141 /** \fn ThreadedFileWriter::DiskLoop(void)
142 * \brief The thread run method that actually calls safe_write().
143 */
144-void ThreadedFileWriter::DiskLoop(void)
145+int ThreadedFileWriter::DiskLoop(void)
146 {
147- uint size = 0, written = 0;
148+ int size = 0;
149+ uint written = 0;
150
151- while (!in_dtor || BufUsed() > 0)
152+ while (!no_writes && (!in_dtor || BufUsed() > 0 ))
153 {
154 size = BufUsed();
155
156@@ -347,7 +368,7 @@
157 bufferEmpty.wakeAll();
158
159 if (!size || (!in_dtor && !flush &&
160- ((size < tfw_min_write_size) &&
161+ (((uint)size < tfw_min_write_size) &&
162 (written >= tfw_min_write_size))))
163 {
164 bufferHasData.wait(100);
165@@ -358,19 +379,22 @@
166 buffer is valid, and we try to write all of it at once which
167 takes a long time. During this time, the other thread fills up
168 the 10% that was free... */
169- size = (size > TFW_MAX_WRITE_SIZE) ? TFW_MAX_WRITE_SIZE : size;
170+ size = ((uint)size > TFW_MAX_WRITE_SIZE) ? TFW_MAX_WRITE_SIZE : size;
171
172 if ((rpos + size) > tfw_buf_size)
173 {
174 int first_chunk_size = tfw_buf_size - rpos;
175 int second_chunk_size = size - first_chunk_size;
176 size = safe_write(fd, buf+rpos, first_chunk_size);
177+ if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious write failure abort now!
178 if ((int)size == first_chunk_size)
179 size += safe_write(fd, buf, second_chunk_size);
180+ if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious write failure abort now
181 }
182 else
183 {
184 size = safe_write(fd, buf+rpos, size);
185+ if ( size == -1 ) { no_writes = true; in_dtor = true; }//serious write failure abort now
186 }
187
188 if (written < tfw_min_write_size)
189@@ -384,6 +408,9 @@
190
191 bufferWroteData.wakeAll();
192 }
193+ // pass serious write error upstream, if it occurs....
194+ if ( no_writes == true && in_dtor == true ) return -1;
195+ return 0;
196 }
197
198 /** \fn ThreadedFileWriter::BufUsed(void)
199Index: libs/libmythtv/ThreadedFileWriter.h
200===================================================================
201--- libs/libmythtv/ThreadedFileWriter.h (revision 8639)
202+++ libs/libmythtv/ThreadedFileWriter.h (working copy)
203@@ -7,6 +7,8 @@
204 #include <qwaitcondition.h>
205 #include <qstring.h>
206
207+extern int LastSignal;
208+
209 class ThreadedFileWriter
210 {
211 public:
212@@ -16,7 +18,7 @@
213 bool Open(void);
214
215 long long Seek(long long pos, int whence);
216- uint Write(const void *data, uint count);
217+ int Write(const void *data, uint count);
218
219 void SetWriteBufferSize(uint newSize = TFW_DEF_BUF_SIZE);
220 void SetWriteBufferMinWriteSize(uint newMinSize = TFW_MIN_WRITE_SIZE);
221@@ -29,7 +31,7 @@
222
223 protected:
224 static void *boot_writer(void *);
225- void DiskLoop(void);
226+ int DiskLoop(void);
227
228 static void *boot_syncer(void *);
229 void SyncLoop(void);
230Index: programs/mythbackend/mainserver.h
231===================================================================
232--- programs/mythbackend/mainserver.h (revision 8639)
233+++ programs/mythbackend/mainserver.h (working copy)
234@@ -22,6 +22,8 @@
235 class HttpStatus;
236 class ProcessRequestThread;
237
238+extern int LastSignal;
239+
240 class MainServer : public QObject
241 {
242 Q_OBJECT
243Index: programs/mythbackend/main.cpp
244===================================================================
245--- programs/mythbackend/main.cpp (revision 8639)
246+++ programs/mythbackend/main.cpp (working copy)
247@@ -42,6 +42,7 @@
248 QString lockfile_location;
249 HouseKeeper *housekeeping = NULL;
250 QString logfile = "";
251+//extern int LastSignal;
252
253 bool setupTVs(bool ismaster, bool &error)
254 {
255@@ -182,6 +183,8 @@
256 unlink(lockfile_location.ascii());
257
258 signal(SIGHUP, SIG_DFL);
259+
260+ signal(SIGXFSZ,SIG_DFL);
261 }
262
263 int log_rotate(int report_error)
264@@ -215,7 +218,17 @@
265 log_rotate(0);
266 }
267
268+void file_size_limit_handler(int s)
269+{
270+ extern int LastSignal;
271
272+ // some implementations reset signal handler to default after capture:
273+ signal(SIGXFSZ,file_size_limit_handler);
274+ // remember it, so others can handle it now!
275+ LastSignal = s;
276+}
277+
278+
279 int main(int argc, char **argv)
280 {
281 for(int i = 3; i < sysconf(_SC_OPEN_MAX) - 1; ++i)
282@@ -376,6 +389,9 @@
283
284 if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
285 cerr << "Unable to ignore SIGPIPE\n";
286+
287+ if (signal(SIGXFSZ, &file_size_limit_handler) == SIG_ERR)
288+ cerr << "Unable to set SIGXFSZ handler. Reaching ulimit/filesize limit will kill backend ungracefully.\n";
289
290 if (daemonize)
291 if (daemon(0, 1) < 0)