From d7bd00fc2b3dddc4207494c39ea61c7637f03b61 Mon Sep 17 00:00:00 2001 From: Greg Alexander Date: Sun, 28 Jul 2019 22:41:03 -0400 Subject: [PATCH 1/4] change SIGSEGV handler to use SA_SIGINFO to get context information, and get a stack dump too --- dropbear/svr-main.c | 79 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/dropbear/svr-main.c b/dropbear/svr-main.c index 0a39b70..e24db95 100644 --- a/dropbear/svr-main.c +++ b/dropbear/svr-main.c @@ -30,10 +30,12 @@ #include "runopts.h" #include "dbrandom.h" #include "crypto_desc.h" +#include +#include +#include static size_t listensockets(int *sock, size_t sockcount, int *maxfd); static void sigchld_handler(int dummy); -static void sigsegv_handler(int); static void sigintterm_handler(int fish); #if INETD_MODE static void main_inetd(void); @@ -193,7 +195,9 @@ static void main_noinetd() { } } +fprintf(stderr,"server select in\n"); val = select(maxsock+1, &fds, NULL, NULL, NULL); +fprintf(stderr,"server select out (%d)\n", (int)val); if (ses.exitflag) { unlink(svr_opts.pidfile); @@ -345,7 +349,9 @@ static void sigchld_handler(int UNUSED(unused)) { const int saved_errno = errno; +fprintf(stderr,"sigchld\n"); while(waitpid(-1, NULL, WNOHANG) > 0) {} +fprintf(stderr,"sigchld return from waitpid\n"); sa_chld.sa_handler = sigchld_handler; sa_chld.sa_flags = SA_NOCLDSTOP; @@ -356,10 +362,64 @@ static void sigchld_handler(int UNUSED(unused)) { errno = saved_errno; } +static volatile uintptr_t stack[16]; +static volatile int stack_count; +static _Unwind_Reason_Code unwindCallback(struct _Unwind_Context* context, void* arg) +{ + uintptr_t pc = _Unwind_GetIP(context); + if (pc) { + if (stack_count >= sizeof stack / sizeof stack[0]) { + return _URC_END_OF_STACK; + } else { + stack[stack_count++] = pc; + } + } + return _URC_NO_REASON; +} +static void +dump_sym(uintptr_t addr) +{ + char* symbol = ""; + Dl_info info; + if (dladdr((void*)addr, &info)) { + if (info.dli_sname) { + fprintf(stderr, "%016llX (%s=%016llX)\n", (unsigned long long)addr, info.dli_sname, (unsigned long long)info.dli_saddr); + } else if (info.dli_fname) { + fprintf(stderr, "%016llX (f %s=%016llX)\n", (unsigned long long)addr, info.dli_fname, (unsigned long long)info.dli_fbase); + } else { + fprintf(stderr, "%016llX\n", (unsigned long long)addr); + } + } else { + fprintf(stderr, "%016llX (no dladdr)\n", (unsigned long long)addr); + } +} + +static void backtrace(void) { + int i; + + stack_count = 0; + _Unwind_Backtrace(unwindCallback, NULL); +fprintf(stderr, "stack:\n"); + for (i = 0; i < stack_count; i++) { + dump_sym(stack[i]); + } +} + /* catch any segvs */ -static void sigsegv_handler(int UNUSED(unused)) { +static void sigsegv_handler(int sig, siginfo_t *info, void *ucontext) { + struct sigcontext *ctx = &((ucontext_t *)ucontext)->uc_mcontext; fprintf(stderr, "Aiee, segfault! You should probably report " "this as a bug to the developer\n"); +#if defined(__aarch64__) + fprintf(stderr,"sp=%016llX\n", (unsigned long long)ctx->sp); + fprintf(stderr,"fault_address=%016llX\n", (unsigned long long)ctx->fault_address); + fprintf(stderr, "pc="); + dump_sym(ctx->pc); +{ int i; for (i = 0; i < 31; i++) { fprintf(stderr,"r%d=%016llX\n", i, (unsigned long long)ctx->regs[i]); } } +#else +fprintf(stderr,"not aarch64\n"); +#endif +backtrace(); _exit(EXIT_FAILURE); } @@ -395,9 +455,18 @@ static void commonsetup() { if (sigaction(SIGCHLD, &sa_chld, NULL) < 0) { dropbear_exit("signal() error"); } - if (signal(SIGSEGV, sigsegv_handler) == SIG_ERR) { - dropbear_exit("signal() error"); - } +{ + struct sigaction sa; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART|SA_SIGINFO; + sigaction(SIGSEGV, &sa, NULL); +} + +// if (signal(SIGSEGV, sigsegv_handler) == SIG_ERR) { +// dropbear_exit("signal() error"); +// } crypto_init(); From 2d8d649cddf0d160ca0ccd52be1f427332c51bad Mon Sep 17 00:00:00 2001 From: Greg Alexander Date: Sun, 4 Aug 2019 17:39:09 -0400 Subject: [PATCH 2/4] I finally got a dump! --- dump-20190804 | 503 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 dump-20190804 diff --git a/dump-20190804 b/dump-20190804 new file mode 100644 index 0000000..067e965 --- /dev/null +++ b/dump-20190804 @@ -0,0 +1,503 @@ +starting dropbear +[10627] Aug 04 18:18:56 Not backgrounding +server select in +server select out (1) +server select in +[10650] Aug 04 18:19:06 Child connection from 192.168.1.72:59362 +[10650] Aug 04 18:19:06 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59362 +server select out (1) +server select in +[10650] Aug 04 18:19:09 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +server select in +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +[10656] Aug 04 18:19:09 Child connection from 192.168.1.72:59364 +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +sigchld +sigchld return from waitpid +server select out (-1) +server select in +[10656] Aug 04 18:19:09 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59364 +server select out (1) +server select in +[10656] Aug 04 18:19:10 Exit (user): Disconnect received +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +server select out (1) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +server select in +[10659] Aug 04 18:19:10 Child connection from 192.168.1.72:59366 +sigchld +sigchld return from waitpid +server select out (-1) +server select in +[10659] Aug 04 18:19:10 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59366 +server select out (1) +server select in +[10659] Aug 04 18:19:11 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +server select in +sigchld +sigchld return from waitpid +[10662] Aug 04 18:19:11 Child connection from 192.168.1.72:59368 +[10662] Aug 04 18:19:11 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59368 +server select out (1) +server select in +[10662] Aug 04 18:19:11 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +server select in +[10665] Aug 04 18:19:12 Child connection from 192.168.1.72:59370 +sigchld +sigchld return from waitpid +server select out (-1) +server select in +[10665] Aug 04 18:19:12 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59370 +server select out (1) +server select in +[10665] Aug 04 18:19:12 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +sigchld +sigchld return from waitpid +server select in +[10669] Aug 04 18:19:12 Child connection from 192.168.1.72:59372 +[10669] Aug 04 18:19:12 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59372 +server select out (1) +server select in +[10669] Aug 04 18:19:12 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +server select in +[10672] Aug 04 18:19:12 Child connection from 192.168.1.72:59374 +sigchld +sigchld return from waitpid +server select out (-1) +server select in +[10672] Aug 04 18:19:12 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59374 +server select out (1) +server select in +[10672] Aug 04 18:19:13 Exit (user): Disconnect received +server select out (1) +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +server select in +[10675] Aug 04 18:19:13 Child connection from 192.168.1.72:59376 +sigchld +sigchld return from waitpid +server select out (-1) +server select in +[10675] Aug 04 18:19:13 Pubkey auth succeeded for 'user' with key md5 0f:fb:35:79:cf:36:50:18:c8:0a:6d:8b:7c:be:27:a8 from 192.168.1.72:59376 +server select out (1) +server select in +[10675] Aug 04 18:19:13 Exit (user): Disconnect received +Aiee, segfault! You should probably report this as a bug to the developer +sp=0000007FED323B00 +fault_address=00000007FFD3454C +pc=00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +r0=00000007FFD34554 +r1=00000007FFD3454C +r2=0000000000000001 +r3=0000000070100000 +r4=0000000070100001 +r5=0000000000000000 +r6=0000000000000001 +r7=00000000000000D8 +r8=00000007FFD34550 +r9=0000000000000001 +r10=0000000000000001 +r11=0000000000000001 +r12=0000000000000001 +r13=0000000000000001 +r14=0000000000000001 +r15=0000000000009669 +r16=0000000000004B34 +r17=0000000000000000 +r18=0000000000000001 +r19=00000078C91A3A00 +r20=0000000000000004 +r21=0000000000000000 +r22=0000000000000000 +r23=00000078C91A3AE0 +r24=00000078DE248AF0 +r25=00000078DB854640 +r26=000000797167C000 +r27=0000000000000018 +r28=0000000000000020 +r29=0000007FED323B20 +r30=00000078DB84FAA4 +stack: +00000078CEA86AF8 (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000078CEA869FC (f /data/app/org.galexander.sshd-T8u2XjV_9MO9tmjVSkhk-w==/lib/arm64/libsimplesshd-jni.so=00000078CEA4A000) +00000061A8B6DA90 (f /system/bin/app_process64=00000061A8B6B000) +00000079734A47F0 (__kernel_rt_sigreturn=00000079734A47F0) +00000078DB95C750 (_ZN10A5xContext8HwAddNopEPjj=00000078DB95C710) +00000078DB84FAA4 (_ZN9EsxCmdMgr16IssuePendingIB1sE14EsxFlushReasonii=00000078DB84F930) +00000078DB850C3C (_ZN9EsxCmdMgr5FlushE14EsxFlushReason=00000078DB84FCC8) +00000078DB80EF3C (_ZN10EsxContext7DestroyEv=00000078DB80EF08) +00000078DB81F0D8 (_ZN10EglContext17DestroyEsxContextEv=00000078DB81F090) +00000078DB81EFFC (_ZN10EglContext7DestroyEv=00000078DB81EF60) +00000078DB821CA0 (_ZN10EglDisplay25MarkContextListForDestroyEv=00000078DB821BF0) +00000078DB82151C (_ZN10EglDisplay9TerminateEi=00000078DB821418) +00000078DB82134C (_ZN14EglDisplayList7DestroyEv=00000078DB8211F8) +00000078DB822838 (_ZN10EglDisplay35DestroyStaticListsMutexesAndTlsKeysEv=00000078DB8227C8) +00000078DB6F2F20 (_Z16EsxEntryDestructv=00000078DB6F2F10) +00000078DB854780 (f /system/vendor/lib64/egl/libGLESv2_adreno.so=00000078DB642000) +sigchld +sigchld return from waitpid +server select out (-1) +server select in From e204c1ea7451851baa44cbba1770ad787176a648 Mon Sep 17 00:00:00 2001 From: Greg Alexander Date: Mon, 5 Aug 2019 11:15:05 -0400 Subject: [PATCH 3/4] musings about the dump, which must be caused by atexit() --- NOTES | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/NOTES b/NOTES index 847ab97..ba524f8 100644 --- a/NOTES +++ b/NOTES @@ -898,6 +898,75 @@ the manifest... allowBackup="false" took immediate effect and had no surprises... +August 4, 2019. + +I finally got a dump from a user (Hammad), and it's quite distressing. +The stack trace is roughly: + backtrace() + sigsegv_handler() + /system/bin/app_process64+0x2a90 + __kernel_rt_sigreturn() + A5xContext::HwAddNop(unsigned int *, unsigned int) + EsxCmdMgr::IssuePendingIB1s(EsxFlushReason, int, int) + EsxCmdMgr::Flush(EsxFlushReason) + EsxContext::Destroy() + EglContext::DestroyEsxContext() + EglDisplay::MarkContextListForDestroy() + EglDisplay::Terminate(int) + EglDisplayList::Destroy() + EglDisplay::DestroyStaticListsMutexesAndTlsKeys() + EsxEntryDestruct() + /system/vendor/lib64/egl/libGLESv2_adreno.so+0x12780 + [... cut off at 16 ...] + +So many questions! I think app_process64 must be the actual C main() of +a process, responsible for branching into all the android system +libraries? I imagine it's involved because it's somehow intercepted the +SIGSEGV and re-dispatched it to my handler? I don't see any way we could +have branched into libGLESv2_adreno from userland, so the SIGSEGV must +come from the UI thread, I guess? Maybe this SIGSEGV is actually the +sort of thing we'd get if we tried to call UI code from the non-UI +thread?? + +It looks like GLES is busy cleaning itself up, and it crashes. Why's it +crash? Why's it trying to clean itself up? + +Hammad says there is no problem using sshd...I thought he meant that the +re-start logic is working for him but his dropbear.err has multiple dumps +in it! The SIGSEGVs are apparently not killing the daemon. + +There are no timestamps on the dumps, but it looks like they're +associated with activity anyways. Each dump happens between "Disconnect +received" and "sigchld". Some of them have "server select out" +interleaved into the dump, which I think is the result of Hammad running: + while true; do ssh phone 'exit'; done +That is, it appears he starts a new connection the very instant the old +connection ends. So the new connection comes into the server process +while the child process is in the act of dying. + +The thing is, I don't see how it could possibly be getting signals from +the Java side of things, because it fork()s before setting up the signal +handling. It's not just running in a different thread, it should be a +totally separate process. I can test this but I don't think I'm wrong +about that. + +So I guess just about the only thing that's really possible is that +there's an atexit() which survives the fork() because it isn't followed +up with an execve(). It's not caused by ARM, or even necessarily by +Android 9...the reason it doesn't show up in the emulator is that the +libGLES that registers the atexit() is vendor-supplied for specific +hardware ("Adreno"). + +So I need to figure out how to bypass the atexit() somehow, perhaps by +calling _exit() directly? + + +XXX - merge back into main branch, because I'll want to keep the dump facility +XXX - make the dump go deeper in the stack +XXX - put a crash in an atexit() to be sure it presents about this way +XXX - test re-start mechanism, which doesn't seem to work on the first try if it crashes +XXX - test bypassing that crash +XXX - remove the crash, remove the debug fprintfs (select in/out, sigchld) --- new release From 967a1e42a985630dc34d922fcecf5a6e1b2aeda7 Mon Sep 17 00:00:00 2001 From: Greg Alexander Date: Mon, 5 Aug 2019 11:41:32 -0400 Subject: [PATCH 4/4] probable technique for bypassing atexit --- NOTES | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NOTES b/NOTES index ba524f8..07af3fa 100644 --- a/NOTES +++ b/NOTES @@ -965,7 +965,7 @@ XXX - merge back into main branch, because I'll want to keep the dump facility XXX - make the dump go deeper in the stack XXX - put a crash in an atexit() to be sure it presents about this way XXX - test re-start mechanism, which doesn't seem to work on the first try if it crashes -XXX - test bypassing that crash +XXX - test bypassing that crash (register an atexit() just after the fork that calls _Exit(0)) XXX - remove the crash, remove the debug fprintfs (select in/out, sigchld) --- new release