// Released under the MIT License. See LICENSE for details. // An ultra-simple client app to forward commands to a pcommand server. This // lets us run *lots* of small pcommands very fast. Often the limiting // factor in such cases is the startup time of Python which this mostly // eliminates. See tools/efrotools/pcommandbatch.py for more info. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "cJSON.h" struct Context_ { const char* state_dir_path; const char* project_dir_path; const char* instance_prefix; int instance_num; int pid; int verbose; int debug; int server_idle_seconds; const char* pcommandpath; int sockfd; }; int path_exists_(const char* path); int establish_connection_(struct Context_* ctx); int calc_paths_(struct Context_* ctx); int send_command_(struct Context_* ctx, int argc, char** argv); int handle_response_(struct Context_* ctx); // Read all data from a socket and return as a malloc'ed null-terminated // string. char* read_string_from_socket_(const struct Context_* ctx); // Tear down context (closing socket, etc.) before closing app. void tear_down_context_(struct Context_* ctx); // If a valid state file is present at the provided path and not older than // server_idle_seconds, return said port as an int. Otherwise return -1; int get_running_server_port_(const struct Context_* ctx, const char* state_file_path_full); int main(int argc, char** argv) { struct Context_ ctx; memset(&ctx, 0, sizeof(ctx)); ctx.server_idle_seconds = 5; ctx.pid = getpid(); ctx.sockfd = -1; // Verbose mode enables more printing here. Debug mode enables that plus // extra stuff. The extra stuff is mostly the server side though. { const char* debug_env = getenv("BA_PCOMMANDBATCH_DEBUG"); ctx.debug = debug_env && !strcmp(debug_env, "1"); const char* verbose_env = getenv("BA_PCOMMANDBATCH_VERBOSE"); ctx.verbose = ctx.debug || (verbose_env && !strcmp(verbose_env, "1")); } // Seed rand() using the current time in microseconds. struct timeval tv; gettimeofday(&tv, NULL); unsigned int seed = tv.tv_usec; srand(seed); // Figure our which file path we'll use to get server state. if (calc_paths_(&ctx) != 0) { tear_down_context_(&ctx); return 1; } // Establish communication with said server (spinning it up if needed). ctx.sockfd = establish_connection_(&ctx); if (ctx.sockfd == -1) { tear_down_context_(&ctx); return 1; } if (send_command_(&ctx, argc, argv) != 0) { tear_down_context_(&ctx); return 1; } int result_val = handle_response_(&ctx); if (result_val != 0) { tear_down_context_(&ctx); return 1; } tear_down_context_(&ctx); return result_val; } void tear_down_context_(struct Context_* ctx) { if (ctx->sockfd != -1) { if (close(ctx->sockfd) != 0) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): error %d closing " "socket.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, errno); } } } int get_running_server_port_(const struct Context_* ctx, const char* state_file_path_full) { struct stat file_stat; time_t current_time = time(NULL); if (current_time == -1) { perror("time"); return -1; } int fd = open(state_file_path_full, O_RDONLY); if (fd < 0) { return -1; } if (fstat(fd, &file_stat) == -1) { close(fd); return -1; } int age_seconds = current_time - file_stat.st_mtime; if (ctx->verbose) { if (age_seconds <= ctx->server_idle_seconds) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) found state file with age " "%d at " "time %ld.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, age_seconds, time(NULL)); } } if (age_seconds > ctx->server_idle_seconds) { close(fd); return -1; } else if (age_seconds < 0) { fprintf(stderr, "pcommandbatch got negative age; unexpected."); } char buf[256]; ssize_t amt = read(fd, buf, sizeof(buf) - 1); close(fd); if (amt == -1 || amt == sizeof(buf) - 1) { return -1; } buf[amt] = 0; // Null-terminate it. cJSON* state_dict = cJSON_Parse(buf); if (!state_dict) { // An un-parseable state file is not a recoverable error; go down hard. // State files are written and then moved into place so we should never // see something like a half-written file. fprintf(stderr, "Fatal Error: pcommandbatch client %s_%d (pid %d):" " failed to parse state file value of size %zu.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, amt); fflush(stderr); abort(); // return -1; } // If results included output, print it. cJSON* port_obj = cJSON_GetObjectItem(state_dict, "p"); if (!port_obj || !cJSON_IsNumber(port_obj)) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to get port " "value from state.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); cJSON_Delete(state_dict); return -1; } int port = cJSON_GetNumberValue(port_obj); cJSON_Delete(state_dict); return port; } int path_exists_(const char* path) { struct stat file_stat; return (stat(path, &file_stat) != -1); } int establish_connection_(struct Context_* ctx) { char state_file_path_full[256]; snprintf(state_file_path_full, sizeof(state_file_path_full), "%s/worker_state_%s_%d", ctx->state_dir_path, ctx->instance_prefix, ctx->instance_num); int sockfd = 0; if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): could not create " "socket.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); return -1; } // On Mac I'm running into EADDRNOTAVAIL errors if I spit out too many // requests in a short enough period of time. I'm guessing its exhausting // free ports when cooldown time is taken into account. Sleeping and // trying again in a moment seems to work. int retry_attempt = 0; int retry_sleep_secs = 1; while (1) { // First look for an already-running batch server. int port = get_running_server_port_(ctx, state_file_path_full); if (port == -1) { // Ok; no running server. Spin one up. if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) requesting batch server " "spinup...\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); } // In non-debug-mode, route to a log file. char endbuf[1024]; if (ctx->debug) { snprintf(endbuf, sizeof(endbuf), " &"); } else { snprintf(endbuf, sizeof(endbuf), " >>%s/worker_log_%s_%d 2>&1 &", ctx->state_dir_path, ctx->instance_prefix, ctx->instance_num); } char buf[2048]; snprintf(buf, sizeof(buf), "%s batchserver --timeout %d --project-dir %s" " --instance %s_%d %s", ctx->pcommandpath, ctx->server_idle_seconds, ctx->project_dir_path, ctx->instance_prefix, ctx->instance_num, endbuf); system(buf); // Spin and wait up to a few seconds for the file to appear. time_t start_time = time(NULL); int cycles = 0; while (time(NULL) - start_time < 5) { port = get_running_server_port_(ctx, state_file_path_full); if (port != -1) { break; } usleep(10000); cycles += 1; } if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) waited %d" " cycles for state file to appear at '%s'.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, cycles, state_file_path_full); } if (port == -1) { // We failed but we can retry. if (ctx->verbose) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to open " "server on attempt %d.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, retry_attempt); } } } // Ok we got a port; now try to connect to it. if (port != -1) { if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) will use server on port " "%d at " "time %ld.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, port, time(NULL)); } struct sockaddr_in serv_addr; memset(&serv_addr, '0', sizeof(serv_addr)); serv_addr.sin_family = AF_INET; serv_addr.sin_port = htons(port); serv_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); int cresult = connect(sockfd, (struct sockaddr*)&serv_addr, sizeof(serv_addr)); if (cresult == 0) { break; } else if (errno == EADDRNOTAVAIL) { // Seems we can get this if blasting the machine with enough // commands that they run out of ports for us to use. The situation // should resolve itself if we wait/retry a few times. if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d): got EADDRNOTAVAIL" " on connect attempt %d.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, retry_attempt + 1); } } else if (errno == ECONNREFUSED) { // Am seeing these very rarely on random one-off commands. I'm // guessing there's some race condition at the OS level where the // port-file write goes through before the socket is actually truly // accepting connections. A retry should succeed. if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d): got ECONNREFUSED" " on connect attempt %d.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, retry_attempt + 1); } } else if (errno == EINVAL) { // Saw this randomly once on Mac. Not sure what could have led to it. if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d): got EINVAL" " on connect attempt %d.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, retry_attempt + 1); } } else { // Currently not retrying on other errors. fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): connect failed " "(errno " "%d).\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, errno); close(sockfd); return -1; } } // Let's stop at 5, which will be about a minute of waiting total. if (retry_attempt >= 5) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): too many " "retry attempts; giving up.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); close(sockfd); return -1; } // Am currently seeing the occasional hang in this loop. Let's flip // into verbose if that might be happening to diagnose. ctx->verbose = 1; if (ctx->verbose) { fprintf( stderr, "pcommandbatch client %s_%d (pid %d) connection attempt %d failed;" " will sleep %d secs and try again.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, retry_attempt + 1, retry_sleep_secs); } sleep(retry_sleep_secs); retry_attempt += 1; retry_sleep_secs *= 2; } return sockfd; } int calc_paths_(struct Context_* ctx) { // Because the server needs to be in the same cwd as we are for things to // work, we only support a specific few locations to run from. Currently // this is project-root and src/assets if (path_exists_("config/projectconfig.json")) { // Looks like we're in project root. ctx->project_dir_path = "."; ctx->state_dir_path = ".cache/pcommandbatch"; ctx->instance_prefix = "root"; ctx->pcommandpath = "tools/pcommand"; } else if (path_exists_("ba_data") && path_exists_("../../config/projectconfig.json")) { // Looks like we're in src/assets. ctx->project_dir_path = "../.."; ctx->state_dir_path = "../../.cache/pcommandbatch"; ctx->instance_prefix = "assets"; ctx->pcommandpath = "../../tools/pcommand"; } if (ctx->state_dir_path == NULL) { char cwdbuf[MAXPATHLEN]; if (getcwd(cwdbuf, sizeof(cwdbuf)) < 0) { fprintf(stderr, "Error: pcommandbatch client %s (pid %d): unable to get cwd.\n", ctx->instance_prefix, ctx->pid); return -1; } fprintf(stderr, "Error: pcommandbatch client %s (pid %d): pcommandbatch from cwd " "'%s' " "is not supported.\n", ctx->instance_prefix, ctx->pid, cwdbuf); return -1; } assert(ctx->pcommandpath != NULL); assert(ctx->instance_prefix != NULL); // Spread requests for each location out randomly across a few instances. // This greatly increases scalability though is probably wasteful when // running just a few commands since we likely spin up a new server for // each. Maybe there's some way to smartly scale this. The best setup // might be to have a single 'controller' server instance that spins up // worker instances as needed. Though such a fancy setup might be // overkill. // ctx->instance_num = rand() % 6; // I was wondering if using pid would lead to a more even distribution, // but it didn't make a significant difference in my tests. And I worry // there would be some odd corner case where pid isn't going up evenly, so // sticking with rand() for now. ctx->instance_num = ctx->pid % 6; // Actually I think this might be a good technique. This should deliver a // few consecutive requests to a single server instance so it should // reduce wasted spinup time when just a command or two is run, but it // should still scale up to use all 6 instances when lots of commands go // through. (tests show this to be the same speed as the others in that // latter case). ctx->instance_num = (ctx->pid / 4) % 6; return 0; } int color_enabled() { // This logic here should line up with how the 'color_enabled' val in // efro.terminal is calculated. // Allow explict enabling/disabling via this env var. const char* env = getenv("EFRO_TERMCOLORS"); if (env && !strcmp(env, "1")) { return 1; } if (env && !strcmp(env, "0")) { return 0; } env = getenv("TERM"); // If TERM is unset, don't attempt color (this is currently the case // in xcode). if (!env) { return 0; } // A common way to say the terminal can't do fancy stuff like color. if (env && !(strcmp(env, "dumb"))) { return 0; } // If our stdout is not attached to a terminal, go with no-color. if (!isatty(1)) { return 0; } // We seem to be a terminal with color support; let's do it! return 1; } int send_command_(struct Context_* ctx, int argc, char** argv) { // Build a json array of our args. cJSON* req = cJSON_CreateObject(); cJSON* array = cJSON_CreateArray(); for (int i = 0; i < argc; ++i) { cJSON_AddItemToArray(array, cJSON_CreateString(argv[i])); } cJSON_AddItemToObject(req, "a", array); cJSON_AddItemToObject( req, "c", color_enabled() ? cJSON_CreateTrue() : cJSON_CreateFalse()); char* json_out = cJSON_Print(req); // Send our command. int msglen = strlen(json_out); if (write(ctx->sockfd, json_out, msglen) != msglen) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): write failed.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); return -1; } // Issue a write shutdown so they get EOF on the other end. if (shutdown(ctx->sockfd, SHUT_WR) < 0) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): write shutdown " "failed.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); return -1; } // Clean up our mess after we've sent them on their way. free(json_out); cJSON_Delete(req); return 0; } int handle_response_(struct Context_* ctx) { char* inbuf = read_string_from_socket_(ctx); // Getting null or an empty string response imply something is broken. if (!inbuf || inbuf[0] == 0) { fprintf(stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to read result " "(errno %d).\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, errno); if (inbuf) { free(inbuf); } return -1; } cJSON* result_dict = cJSON_Parse(inbuf); if (!result_dict) { fprintf( stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to parse result " "value: %s\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, inbuf); free(inbuf); return -1; } else { free(inbuf); } // If results included stdout output, print it. cJSON* result_output = cJSON_GetObjectItem(result_dict, "o"); if (!result_output || !cJSON_IsString(result_output)) { fprintf( stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to parse result " "output value.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); cJSON_Delete(result_dict); return -1; } char* output_str = cJSON_GetStringValue(result_output); assert(output_str); if (output_str[0] != 0) { printf("%s", output_str); } // If results included stderr output, print it. result_output = cJSON_GetObjectItem(result_dict, "e"); if (!result_output || !cJSON_IsString(result_output)) { fprintf( stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to parse result " "output value.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); cJSON_Delete(result_dict); return -1; } output_str = cJSON_GetStringValue(result_output); assert(output_str); if (output_str[0] != 0) { fprintf(stderr, "%s", output_str); } cJSON* result_code = cJSON_GetObjectItem(result_dict, "r"); if (!result_code || !cJSON_IsNumber(result_code)) { fprintf( stderr, "Error: pcommandbatch client %s_%d (pid %d): failed to parse result " "code value.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid); cJSON_Delete(result_dict); return -1; } int result_val = cJSON_GetNumberValue(result_code); if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) final result is %d.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, result_val); } cJSON_Delete(result_dict); return result_val; } char* read_string_from_socket_(const struct Context_* ctx) { const size_t initial_buffer_size = 1024 * 10; char* buffer = NULL; size_t buffer_size = 0; size_t data_received = 0; // Allocate initial memory for the buffer buffer = malloc(initial_buffer_size); if (!buffer) { perror("Failed to allocate memory for buffer"); return NULL; } buffer_size = initial_buffer_size; while (1) { // Read data from the socket. ssize_t bytes_read = recv(ctx->sockfd, buffer + data_received, buffer_size - data_received - 1, 0); if (bytes_read == -1) { perror("Error reading socket data"); free(buffer); return NULL; } else if (bytes_read == 0) { // Connection closed. break; } data_received += bytes_read; // Check if buffer is full (reserving space for term char); resize if // necessary. if (data_received + 1 >= buffer_size) { buffer_size *= 2; char* rbuffer = (char*)realloc(buffer, buffer_size); if (rbuffer) { buffer = rbuffer; } else { perror("Failed to resize buffer"); free(buffer); return NULL; } } } assert(data_received + 1 < buffer_size); if (ctx->verbose) { fprintf(stderr, "pcommandbatch client %s_%d (pid %d) read %zu byte response.\n", ctx->instance_prefix, ctx->instance_num, ctx->pid, data_received); } buffer[data_received] = 0; // Null terminator. return buffer; }