diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..059a607 --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# backups +*~ +\#*# +# thumbnails +.DS_Store +# objects +*.o +# libraries +*.so +*.a +# dependency files +*.d +# root dictionary pcm files +*.pcm +# root dictionary sourcefiles +*Dict.cxx +*Dict.h +# python pre-compiled modules +*.pyc +# data files +*.root +# core dumps +core +# ToolFramework log files +log.e +log.o +# ToolFramework UUIDs +UUID + +# the dependencies symlink +Dependencies +# executables +main +RemoteControl +NodeDaemon +# lib folder is just build products +lib/* +# the include folder is actually automatically populated +include/* + diff --git a/CMakeLists.txt b/CMakeLists.txt index f2fdc57..def0e62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,17 +1,17 @@ +#todo sym links not copy headers, use macro to seach for .so files in Usertools and add the libraries to libs list and symlink to libs folder + cmake_minimum_required (VERSION 2.6) project (ToolDAQApplicaiton) -set(TOOLDAQ_PATH "${PROJECT_SOURCE_DIR}/ToolDAQ") - -#include(${TOOLDAQ_PATH}/ToolDAQFramework/CMakeLists.include) +set(DEPENDENCIES_PATH "${PROJECT_SOURCE_DIR}/Dependencies") -set(ZMQ_INC "${TOOLDAQ_PATH}/zeromq-4.0.7/include/") -set(ZMQ_LIB_PATH "${TOOLDAQ_PATH}/zeromq-4.0.7/lib") +set(ZMQ_INC "${DEPENDENCIES_PATH}/zeromq-4.0.7/include/") +set(ZMQ_LIB_PATH "${DEPENDENCIES_PATH}/zeromq-4.0.7/lib") set(ZMQ_LIBS zmq) -set(BOOST_INC "${TOOLDAQ_PATH}/boost_1_66_0/install/include/") -set(BOOST_LIB_PATH "${TOOLDAQ_PATH}/boost_1_66_0/install/lib") +set(BOOST_INC "${DEPENDENCIES_PATH}/boost_1_66_0/install/include/") +set(BOOST_LIB_PATH "${DEPENDENCIES_PATH}/boost_1_66_0/install/lib") set(BOOST_LIBS boost_date_time boost_serialization boost_iostreams) set(DATAMODEL_INC "") @@ -22,21 +22,19 @@ set(MYTOOLS_INC "") set(MYTOOLS_LIB_PATH "") set(MYTOOLS_LIBS "") -#add_subdirectory(${TOOLDAQ_PATH}/ToolDAQFramework/ ./ToolDAQ/ToolDAQFramework/) +set(TOOLFRAMEWORK_INC "${DEPENDENCIES_PATH}/ToolFrameworkCore/include") +set(TOOLFRAMEWORK_LIBS_PATH "${DEPENDENCIES_PATH}/ToolFrameworkCore/lib") +set(TOOLFRAMEWORK_LIBS DataModelBase Logging Store ToolChain) -if(NOT(${PROJECT_SOURCE_DIR} STREQUAL ${PROJECT_BINARY_DIR})) -message("Not Building in source directory: Copying files") -FILE(COPY ${PROJECT_SOURCE_DIR}/configfiles DESTINATION ${PROJECT_BINARY_DIR}/) -FILE(COPY ${PROJECT_SOURCE_DIR}/UserTools DESTINATION ${PROJECT_BINARY_DIR}/) -FILE(COPY ${PROJECT_SOURCE_DIR}/DataModel DESTINATION ${PROJECT_BINARY_DIR}/) -FILE(COPY ${PROJECT_SOURCE_DIR}/Setup.sh DESTINATION ${PROJECT_BINARY_DIR}/) -endif() +set(TOOLDAQ_INC "${DEPENDENCIES_PATH}/ToolDAQFramework/include") +set(TOOLDAQ_LIBS_PATH "${DEPENDENCIES_PATH}/ToolDAQFramework/lib") +set(TOOLDAQ_LIBS DAQDataModelBase DAQLogging DAQStore ServiceDiscovery ToolDAQChain) -include_directories(${PROJECT_BINARY_DIR}/DataModel ${BOOST_INC} ${ZMQ_INC} ${DATAMODEL_INC} ${MYTOOLS_INC} ${TOOLDAQ_PATH}/ToolDAQFramework/include ${TOOLDAQ_PATH}/ToolDAQFramework/src/Tool ${TOOLDAQ_PATH}/ToolDAQFramework/src/ToolChain ${TOOLDAQ_PATH}/ToolDAQFramework/src/Logging ${TOOLDAQ_PATH}/ToolDAQFramework/src/Store ${TOOLDAQ_PATH}/ToolDAQFramework/src/ServiceDiscovery/) -link_directories("${PROJECT_BINARY_DIR}/lib" ${BOOST_LIB_PATH} ${ZMQ_LIB_PATH} ${DATAMODEL_LIB_PATH} ${MYTOOLS_LIB_PATH} ${TOOLDAQ_PATH}/ToolDAQFramework/lib) +include_directories (${DATAMODEL_INC} ${MYTOOLS_INC} ${TOOLFRAMEWORK_INC} ${TOOLDAQ_INC} ${ZMQ_INC} ${BOOST_INC}) +link_directories(${DATAMODEL_LIB_PATH} ${MYTOOLS_LIB_PATH} ${TOOLFRAMEWORK_LIBS_PATH} ${TOOLDAQ_LIBS_PATH} ${ZMQ_LIB_PATH} ${BOOST_LIB_PATH}) MACRO(HEADER_DIRECTORIES return_list) - FILE(GLOB_RECURSE new_list ${PROJECT_BINARY_DIR}/UserTools/*.h) + FILE(GLOB_RECURSE new_list ${PROJECT_SOURCE_DIR}/src/*.h ${PROJECT_SOURCE_DIR}/DataModel/*.h ${PROJECT_SOURCE_DIR}/UserTools/*.h ) FILE(COPY ${new_list} DESTINATION ${PROJECT_BINARY_DIR}/include) SET(dir_list "") FOREACH(file_path ${new_list}) @@ -47,37 +45,18 @@ MACRO(HEADER_DIRECTORIES return_list) SET(${return_list} ${dir_list}) ENDMACRO() +FILE(COPY ${PROJECT_SOURCE_DIR}/configfiles DESTINATION ${PROJECT_BINARY_DIR}/) + HEADER_DIRECTORIES(header_list) include_directories(${header_list}) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) -file(GLOB_RECURSE STORE_SRC RELATIVE ${CMAKE_SOURCE_DIR} "${TOOLDAQ_PATH}/ToolDAQFramework/src/Store/*.cpp") -add_library(Store SHARED ${STORE_SRC}) - -file(GLOB_RECURSE LOGGING_SRC RELATIVE ${CMAKE_SOURCE_DIR} "${TOOLDAQ_PATH}/ToolDAQFramework/src/Logging/*.cpp") -add_library(Logging SHARED ${LOGGING_SRC}) - -file(GLOB_RECURSE SERVICEDISCOVERY_SRC RELATIVE ${CMAKE_SOURCE_DIR} "${TOOLDAQ_PATH}/ToolDAQFramework/src/ServiceDiscovery/*.cpp") -add_library(ServiceDiscovery SHARED ${SERVICEDISCOVERY_SRC}) - -file(GLOB_RECURSE DATAMODEL_SRC RELATIVE ${CMAKE_BINARY_DIR} "DataModel/*.cpp") +file(GLOB_RECURSE DATAMODEL_SRC RELATIVE ${CMAKE_SOURCE_DIR} "DataModel/*.cpp") add_library(DataModel SHARED ${DATAMODEL_SRC}) -file(GLOB_RECURSE MYTOOLS_SRC RELATIVE ${CMAKE_BINARY_DIR} "UserTools/*.cpp") +file(GLOB_RECURSE MYTOOLS_SRC RELATIVE ${CMAKE_SOURCE_DIR} "UserTools/*.cpp") add_library(MyTools SHARED ${MYTOOLS_SRC}) -include_directories(${TOOLDAQ_PATH}/ToolDAQFramework/src/Logging) - -file(GLOB_RECURSE TOOLCHAIN_SRC RELATIVE ${CMAKE_SOURCE_DIR} "${TOOLDAQ_PATH}/ToolDAQFramework/src/ToolChain/*.cpp") -add_library(ToolChain SHARED ${TOOLCHAIN_SRC}) - - add_executable (main ${PROJECT_SOURCE_DIR}/src/main.cpp) -target_link_libraries (main Store Logging ToolChain ServiceDiscovery MyTools DataModel ${ZMQ_LIBS} ${BOOST_LIBS} ${DATAMODEL_LIBS} ${MYTOOLS_LIBS}) - -add_executable ( NodeDaemon ${TOOLDAQ_PATH}/ToolDAQFramework/src/NodeDaemon/NodeDaemon.cpp) -target_link_libraries (NodeDaemon Store ServiceDiscovery ${ZMQ_LIBS} ${BOOST_LIBS}) - -add_executable ( RemoteControl ${TOOLDAQ_PATH}/ToolDAQFramework/src/RemoteControl/RemoteControl.cpp) -target_link_libraries (RemoteControl Store ServiceDiscovery ${ZMQ_LIBS} ${BOOST_LIBS}) +target_link_libraries (main MyTools DataModel pthread ${DATAMODEL_LIBS} ${MYTOOLS_LIBS} ${TOOLFRAMEWORK_LIBS} ${TOOLDAQ_LIBS} ${ZMQ_LIBS} ${BOOST_LIBS}) diff --git a/DataModel/DataModel.cpp b/DataModel/DataModel.cpp index 98b11f4..e1267cf 100644 --- a/DataModel/DataModel.cpp +++ b/DataModel/DataModel.cpp @@ -1,6 +1,6 @@ #include "DataModel.h" -DataModel::DataModel(){} +DataModel::DataModel():DAQDataModelBase(){} /* TTree* DataModel::GetTTree(std::string name){ @@ -17,7 +17,7 @@ void DataModel::AddTTree(std::string name,TTree *tree){ } -void DataModel::DeleteTTree(std::string name){ +void DataModel::DeleteTTree(std::string name,TTree *tree){ m_trees.erase(name); diff --git a/DataModel/DataModel.h b/DataModel/DataModel.h index d46a6d4..017f478 100644 --- a/DataModel/DataModel.h +++ b/DataModel/DataModel.h @@ -1,59 +1,139 @@ #ifndef DATAMODEL_H #define DATAMODEL_H -#include -#include #include - -//#include "TTree.h" - -#include "Store.h" -#include "BoostStore.h" -#include "Logging.h" -#include "Utilities.h" - -#include +#include +#include + +#include "DAQDataModelBase.h" +#include "Pool.h" +#include "JobQueue.h" +#include "QueryBatch.h" +#include "ManagedSocket.h" +#include "query_topics.h" +#include "type_name_as_string.h" // mostly for debug +class MonitoringVariables; /** * \class DataModel - * - * This class Is a transient data model class for your Tools within the ToolChain. If Tools need to comunicate they pass all data objects through the data model. There fore inter tool data objects should be deffined in this class. +* +* This class is a transient data model class for your Tools within the ToolChain. If Tools need to communicate they pass all data through the data model. Therefore inter-tool data variables should be defined in this class. * * * $Author: B.Richards $ - * $Date: 2019/05/26 18:34:00 $ - * Contact: b.richards@qmul.ac.uk - * - */ - -class DataModel { - - - public: - - DataModel(); ///< Simple constructor - - //TTree* GetTTree(std::string name); - //void AddTTree(std::string name,TTree *tree); - //void DeleteTTree(std::string name); - - Store vars; ///< This Store can be used for any variables. It is an inefficent ascii based storage - BoostStore CStore; ///< This is a more efficent binary BoostStore that can be used to store a dynamic set of inter Tool variables. - std::map Stores; ///< This is a map of named BooStore pointers which can be deffined to hold a nammed collection of any tipe of BoostStore. It is usefull to store data that needs subdividing into differnt stores. - - Logging *Log; ///< Log class pointer for use in Tools, it can be used to send messages which can have multiple error levels and destination end points - - zmq::context_t* context; ///< ZMQ contex used for producing zmq sockets for inter thread, process, or computer communication - - - private: - - - - //std::map m_trees; - - - + * $Date: 2019/05/26 $ + * Contact: benjamin.richards@warwick.ac.uk + * +*/ + +using namespace ToolFramework; + +class DataModel : public DAQDataModelBase { + + public: + DataModel(); ///< Simple constructor + + Utilities utils; ///< for thread management + + bool change_config; ///< signaller for Tools to reload their configuration variables + + // Tools can add connections to this and the SocketManager + // will periodically invoke UpdateConnections to connect clients + std::map managed_sockets; + std::mutex managed_sockets_mtx; + + Pool job_pool; ///< pool of job structures to encapsulate jobs + JobQueue job_queue; ///< job queue to submit jobs to job manager + uint32_t thread_cap; ///< total number of thread cap to use in the program + std::atomic num_threads; ///< current number of threads + unsigned int worker_threads; + unsigned int max_worker_threads; + + std::map monitoring_variables; + std::mutex monitoring_variables_mtx; + + /* ----------------------------------------- */ + /* MulticastReceiveSender */ + /* ----------------------------------------- */ + + // pool of string buffers: + // the receiver thread grabs a vector from the pool, fills it, + // the pushes the filled vector into the in_multicast_msg_queue + // and grabs a new vector from the pool + // FIXME base pool size on available RAM and struct size / make configurable + // Pool::Pool(bool in_manage=false, uint16_t period_ms=1000, size_t in_object_cap=1) + Pool> multicast_buffer_pool{true, 5000, 100}; + + // batches of received messages, both logging and monitoring + // FIXME make these pairs or structs, container+mtx + // FIXME if instead of just a vector we used MulticastBatch, we could accumulate the length + // and then reserve in advance the length of the string needed for the combined message....? + // XXX actually only if we tracked by topic, as one vector gets turned into 5 topical concat'd strings... + std::vector*> in_multicast_msg_queue; + std::mutex in_multicast_msg_queue_mtx; + + // outgoing logging messages + std::vector out_log_msg_queue; + std::mutex out_log_msg_queue_mtx; + + // outgoing monitoring messages + std::vector out_mon_msg_queue; + std::mutex out_mon_msg_queue_mtx; + + // pool is shared between read and write query receivers + Pool querybatch_pool{true, 5000, 100}; + + /* ----------------------------------------- */ + /* PubReceiver */ + /* ----------------------------------------- */ + std::vector write_msg_queue; + std::mutex write_msg_queue_mtx; + + /* ----------------------------------------- */ + /* ReadReply */ + /* ----------------------------------------- */ + // TODO Tool monitoring struct? + std::vector read_msg_queue; + std::mutex read_msg_queue_mtx; + std::deque query_replies; + std::mutex query_replies_mtx; + + /* ----------------------------------------- */ + /* MulticastWorkers */ + /* ----------------------------------------- */ + // each element is a batch of JSON that can be inserted by the DatabaseWorkers + // FIXME these strings represent batches of multicast messages, so could be very large. + // each push_back could require reallocation, which could involve moving a lot of very large message buffers + // FIXME make these pointers, put the strings (maybe make a struct? maybe just a typedef/alias?) in a pool? + Pool multicast_batch_pool{true, 5000, 100}; + + std::vector log_query_queue; + std::mutex log_query_queue_mtx; + + std::vector mon_query_queue; + std::mutex mon_query_queue_mtx; + + std::vector rootplot_query_queue; + std::mutex rootplot_query_queue_mtx; + + std::vector plotlyplot_query_queue; + std::mutex plotlyplot_query_queue_mtx; + + /* ----------------------------------------- */ + /* WriteWorkers */ + /* ----------------------------------------- */ + std::vector write_query_queue; + std::mutex write_query_queue_mtx; + + /* ----------------------------------------- */ + /* DatabaseWorkers */ + /* ----------------------------------------- */ + + std::vector query_results; // output, awaiting for result conversion + std::mutex query_results_mtx; + + private: + }; diff --git a/DataModel/ManagedSocket.h b/DataModel/ManagedSocket.h new file mode 100644 index 0000000..807ddb5 --- /dev/null +++ b/DataModel/ManagedSocket.h @@ -0,0 +1,18 @@ +#ifndef ManagedSocket_H +#define ManagedSocket_H + +#include +#include +#include + +struct ManagedSocket { + std::mutex socket_mtx; + bool socket_manager_request=false; + zmq::socket_t* socket=nullptr; + std::string service_name; +/* std::string remote_port;*/ + std::string remote_port_name; + std::map connections; +}; + +#endif diff --git a/DataModel/MonitoringVariables.h b/DataModel/MonitoringVariables.h new file mode 100644 index 0000000..6c35ef0 --- /dev/null +++ b/DataModel/MonitoringVariables.h @@ -0,0 +1,42 @@ +#ifndef MonitoringVariables_H +#define MonitoringVariables_H +#include +#include + +class MonitoringVariables { + public: + MonitoringVariables(){}; + virtual ~MonitoringVariables(){}; + virtual std::string toJSON(){ return ""; }; + ToolFramework::Store vars; + std::mutex mtx; + void Clear(){ + std::unique_lock locker(mtx); + vars.Delete(); + return; + } + + template + void Set(const std::string& key, T val){ + std::unique_lock locker(mtx); + vars.Set(key, val); + return; + } + + std::string GetJSON(){ + std::unique_lock locker(mtx); + std::string ret; + vars >> ret; + std::string ret2 = toJSON(); + if(ret.length()==2) return ret2; // if nothing in Store, return result from toJSON + if(!ret2.empty()){ + ret.pop_back(); // remove trailing '}' + ret2[0]=','; // replace leading '{' with ',' to concatenate the two + ret += ret2; + } + return ret; + } + +}; + +#endif diff --git a/DataModel/QueryBatch.h b/DataModel/QueryBatch.h new file mode 100644 index 0000000..4156223 --- /dev/null +++ b/DataModel/QueryBatch.h @@ -0,0 +1,99 @@ +#ifndef QUERY_BATCH_H +#define QUERY_BATCH_H + +#include +#include + +#include "ZmqQuery.h" + +struct QueryBatch { + + QueryBatch(size_t prealloc_size){ + queries.reserve(prealloc_size); + } + + // fill / read by receive/senders + std::vector queries; + + // prepare for batch insertion by workers + std::string alarm_buffer; + std::string devconfig_buffer; + std::string runconfig_buffer; + std::string calibration_buffer; + std::string plotlyplot_buffer; + std::string rootplot_buffer; + + // flagged for can't be batch inserted by workers + std::vector generic_query_indices; + + // set by database workers after batch insert + std::vector devconfig_version_nums; + std::vector runconfig_version_nums; + std::vector calibration_version_nums; + std::vector plotlyplot_version_nums; + std::vector rootplot_version_nums; + + std::string alarm_batch_err; + std::string devconfig_batch_err; + std::string runconfig_batch_err; + std::string calibration_batch_err; + std::string plotlyplot_batch_err; + std::string rootplot_batch_err; + + // for debug + void push_time(std::string_view s){ for(auto&& query : queries) query.push_time(s); } + + void reset(){ + alarm_buffer = "["; + devconfig_buffer = "["; + runconfig_buffer = "["; + calibration_buffer = "["; + plotlyplot_buffer = "["; + rootplot_buffer = "["; + + devconfig_version_nums.clear(); + runconfig_version_nums.clear(); + calibration_version_nums.clear(); + plotlyplot_version_nums.clear(); + rootplot_version_nums.clear(); + generic_query_indices.clear(); + + alarm_batch_err.clear(); + devconfig_batch_err.clear(); + runconfig_batch_err.clear(); + calibration_batch_err.clear(); + plotlyplot_batch_err.clear(); + rootplot_batch_err.clear(); + } + + void close(){ + if(alarm_buffer.length()!=1) alarm_buffer += "]"; + else alarm_buffer.clear(); + + if(devconfig_buffer.length()!=1) devconfig_buffer += "]"; + else devconfig_buffer.clear(); + + if(runconfig_buffer.length()!=1) runconfig_buffer += "]"; + else runconfig_buffer.clear(); + + if(calibration_buffer.length()!=1) calibration_buffer += "]"; + else calibration_buffer.clear(); + + if(plotlyplot_buffer.length()!=1) plotlyplot_buffer += "]"; + else plotlyplot_buffer.clear(); + + if(rootplot_buffer.length()!=1) rootplot_buffer += "]"; + else rootplot_buffer.clear(); + } + + bool got_alarms() const { return !alarm_buffer.empty(); } + bool got_devconfigs() const { return !devconfig_buffer.empty(); } + bool got_runconfigs() const { return !runconfig_buffer.empty(); } + bool got_calibrations() const { return !calibration_buffer.empty(); } + bool got_plotlyplots() const { return !plotlyplot_buffer.empty(); } + bool got_rootplots() const { return !rootplot_buffer.empty(); } + bool got_generics() const { return !generic_query_indices.empty(); } + +}; + +#endif diff --git a/DataModel/Utilities.cpp b/DataModel/Utilities.cpp deleted file mode 100644 index 282578b..0000000 --- a/DataModel/Utilities.cpp +++ /dev/null @@ -1,260 +0,0 @@ -#include - -Utilities::Utilities(zmq::context_t* zmqcontext){ - context=zmqcontext; - Threads.clear(); -} - -bool Utilities::AddService(std::string ServiceName, unsigned int port, bool StatusQuery){ - - zmq::socket_t Ireceive (*context, ZMQ_PUSH); - Ireceive.connect("inproc://ServicePublish"); - - boost::uuids::uuid m_UUID; - m_UUID = boost::uuids::random_generator()(); - - std::stringstream test; - test<<"Add "<< ServiceName <<" "< &connections, std::string port){ - - boost::uuids::uuid m_UUID=boost::uuids::random_generator()(); - long msg_id=0; - - zmq::socket_t Ireceive (*context, ZMQ_DEALER); - Ireceive.connect("inproc://ServiceDiscovery"); - - - zmq::message_t send(4); - snprintf ((char *) send.data(), 4 , "%s" ,"All") ; - - - Ireceive.send(send); - - zmq::message_t receive; - Ireceive.recv(&receive); - std::istringstream iss(static_cast(receive.data())); - - int size; - iss>>size; - - for(int i=0;i(servicem.data())); - service->JsonParser(ss.str()); - - std::string type; - std::string uuid; - std::string ip; - std::string remote_port; - service->Get("msg_value",type); - service->Get("uuid",uuid); - service->Get("ip",ip); - if(port=="") service->Get("remote_port",remote_port); - else remote_port=port; - std::string tmp=ip + ":" + remote_port; - - //if(type == ServiceName && connections.count(uuid)==0){ - if(type == ServiceName && connections.count(tmp)==0){ - connections[tmp]=service; - //std::string ip; - //std::string port; - //service->Get("ip",ip); - //service->Get("remote_port",port); - tmp="tcp://"+ tmp; - sock->connect(tmp.c_str()); - } - else{ - delete service; - service=0; - } - - - } - - return connections.size(); - } - -Thread_args* Utilities::CreateThread(std::string ThreadName, void (*func)(Thread_args*), Thread_args* args){ - - if(Threads.count(ThreadName)==0){ - - if(args==0) args = new Thread_args(); - - args->context=context; - args->ThreadName=ThreadName; - args->func=func; - args->running=true; - - pthread_create(&(args->thread), NULL, Utilities::Thread, args); - - args->sock=0; - Threads[ThreadName]=args; - -} - - else args=0; - - return args; - -} - - -Thread_args* Utilities::CreateThread(std::string ThreadName, void (*func)(std::string)){ - Thread_args *args =0; - - if(Threads.count(ThreadName)==0){ - - args = new Thread_args(context, ThreadName, func); - pthread_create(&(args->thread), NULL, Utilities::String_Thread, args); - args->sock=0; - args->running=true; - Threads[ThreadName]=args; - } - - return args; -} - -void *Utilities::String_Thread(void *arg){ - - - Thread_args *args = static_cast(arg); - - zmq::socket_t IThread(*(args->context), ZMQ_PAIR); - /// need to subscribe - std::stringstream tmp; - tmp<<"inproc://"<ThreadName; - IThread.bind(tmp.str().c_str()); - - - zmq::pollitem_t initems[] = { - {IThread, 0, ZMQ_POLLIN, 0}}; - - args->running = true; - - while(!args->kill){ - if(args->running){ - - std::string command=""; - - zmq::poll(&initems[0], 1, 0); - - if ((initems[0].revents & ZMQ_POLLIN)){ - - zmq::message_t message; - IThread.recv(&message); - command=std::string(static_cast(message.data())); - - } - - args->func_with_string(command); - } - - else usleep(100); - - } - - pthread_exit(NULL); - } - -void *Utilities::Thread(void *arg){ - - Thread_args *args = static_cast(arg); - - while (!args->kill){ - - if(args->running) args->func(args ); - else usleep(100); - - } - - pthread_exit(NULL); - -} - -bool Utilities::MessageThread(Thread_args* args, std::string Message, bool block){ - - bool ret=false; - - if(args){ - - if(!args->sock){ - - args->sock = new zmq::socket_t(*(args->context), ZMQ_PAIR); - std::stringstream tmp; - tmp<<"inproc://"<ThreadName; - args->sock->connect(tmp.str().c_str()); - - } - - zmq::message_t msg(Message.length()+1); - snprintf((char *)msg.data(), Message.length()+1, "%s", Message.c_str()); - - if(block) ret=args->sock->send(msg); - else ret=args->sock->send(msg, ZMQ_NOBLOCK); - - } - - return ret; - -} - -bool Utilities::MessageThread(std::string ThreadName, std::string Message, bool block){ - - return MessageThread(Threads[ThreadName],Message,block); -} - -bool Utilities::KillThread(Thread_args* &args){ - - bool ret=false; - - if(args){ - - args->running=false; - args->kill=true; - - pthread_join(args->thread, NULL); - //delete args; - //args=0; - - - } - - return ret; - -} - -bool Utilities::KillThread(std::string ThreadName){ - - return KillThread(Threads[ThreadName]); - -} - diff --git a/DataModel/Utilities.h b/DataModel/Utilities.h deleted file mode 100644 index 8aac354..0000000 --- a/DataModel/Utilities.h +++ /dev/null @@ -1,152 +0,0 @@ -#ifndef UTILITIES_H -#define UTILITIES_H - -#include -#include -#include -#include -#include -#include -#include -#include // generators -#include // streaming operators etc. - -/** - * \struct DataModelThread_args - * - * This is both an base class for any thread argument struct used in the tool threaded Tool templates. -Effectivly this acts as a place to put variable that are specfic to that thread and can be used as a place to transfer variables from the main thread to sub threads. - * - * - * $Author: B.Richards $ - * $Date: 2019/05/26 18:34:00 $ - * Contact: b.richards@qmul.ac.uk - * - */ - -struct Thread_args{ - - Thread_args(){ ///< Simple constructor - kill=false; - } - - Thread_args(zmq::context_t* contextin, std::string threadname, void (*funcin)(std::string)){ ///< Construtor for thread with string - - context=contextin; - ThreadName=threadname; - func_with_string=funcin; - kill=false; - } - - Thread_args(zmq::context_t* contextin, std::string threadname, void (*funcin)(Thread_args*)){ ///< Constrcutor for thread with args - - context=contextin; - ThreadName=threadname; - func=funcin; - kill=false; - } - - virtual ~Thread_args(){ ///< virtual constructor - running =false; - kill=true; - delete sock; - sock=0; - } - - zmq::context_t *context; ///< ZMQ context used for ZMQ socket creation - std::string ThreadName; ///< name of thread (deffined at creation) - void (*func_with_string)(std::string); ///< function pointer to string thread - void (*func)(Thread_args*); ///< function pointer to thread with args - pthread_t thread; ///< Simple constructor underlying thread that interface is built ontop of - zmq::socket_t* sock; ///< ZMQ socket pointer is assigned in string thread,but can be sued otherwise - bool running; ///< Bool flag to tell the thread to run (if not set thread goes into wait cycle - bool kill; ///< Bool flay used to kill the thread - -}; - - -/** - * \class Utilities - * - * This class can be instansiated in a Tool and provides some helpful threading, dynamic socket descovery and promotion functionality - * - * - * $Author: B.Richards $ - * $Date: 2019/05/26 18:34:00 $ - * Contact: b.richards@qmul.ac.uk - * - */ - -class Utilities{ - - public: - - Utilities(zmq::context_t* zmqcontext); ///< Simple constructor - bool AddService(std::string ServiceName, unsigned int port, bool StatusQuery=false); ///< Broadcasts an available service (only in remote mode) - bool RemoveService(std::string ServiceName); ///< Removes service broadcasts for a service - int UpdateConnections(std::string ServiceName, zmq::socket_t* sock, std::map &connections, std::string port=""); ///< Dynamically connects a socket tp services broadcast with a specific name - Thread_args* CreateThread(std::string ThreadName, void (*func)(std::string)); //func = &my_int_func; ///< Create a simple thread that has string exchange with main thread - Thread_args* CreateThread(std::string ThreadName, void (*func)(Thread_args*), Thread_args* args); ///< Create a thread with more complicated data exchange definned by arguments - bool MessageThread(Thread_args* args, std::string Message, bool block=true); ///< Send simple string to String thread - bool MessageThread(std::string ThreadName, std::string Message, bool block=true); ///< Send simple string to String thread - bool KillThread(Thread_args* &args); ///< Kill a thread assosiated to args - bool KillThread(std::string ThreadName); ///< Kill a thread by name - - template bool KillThread(T* pointer){ - - Thread_args* tmp=pointer; - return KillThread(tmp); - - } ///< Kill a thread with args that inheirt form base Thread_args - - template bool SendPointer(zmq::socket_t* sock, T* pointer){ - - std::stringstream tmp; - tmp<send(message); - - } ///< Send a pointer over a ZMQ socket - - template bool ReceivePointer(zmq::socket_t* sock, T*& pointer){ - - zmq::message_t message; - - if(sock->recv(&message)){ - - std::istringstream iss(static_cast(message.data())); - - // long long unsigned int tmpP; - unsigned long tmpP; - iss>>std::hex>>tmpP; - - pointer=reinterpret_cast(tmpP); - - return true; - } - - else { - pointer=0; - return false; - } - - } ///< Receive a pointer over a ZMQ socket - - - - - private: - - zmq::context_t *context; ///< ZMQ context pointer - static void* String_Thread(void *arg); ///< Simpe string thread - static void* Thread(void *arg); ///< Thread with args - std::map Threads; ///< Map of threads managed by the utilities class. - - -}; - - -#endif diff --git a/DataModel/ZmqQuery.h b/DataModel/ZmqQuery.h new file mode 100644 index 0000000..7c1f598 --- /dev/null +++ b/DataModel/ZmqQuery.h @@ -0,0 +1,115 @@ +#ifndef ZMQ_QUERY_H +#define ZMQ_QUERY_H + +#include // for debug +#include +#include + +struct ZmqQuery { + + ZmqQuery(){}; + ~ZmqQuery(){}; + + // no copy constructor + ZmqQuery(const ZmqQuery&) = delete; + // no copy assignment operator + ZmqQuery& operator=(const ZmqQuery&) = delete; + + // allow move constructor + ZmqQuery(ZmqQuery&& c) = default; + // allow move assignment operator + ZmqQuery& operator=(ZmqQuery&& c) = default; + + // 4 parts for receiving, for sending 3+ parts + std::vector parts{4}; + size_t size() const { + return parts.size(); + } + + // pub socket: topic, client, msgnum, query + // router socket: client, topic, msgnum, query + // replies: client, msgnum, success, results (if present)... + // if success is false, results are 1-part with an error message + + zmq::message_t& operator[](int i){ + return parts[i]; + } + // received and returned + std::string_view client_id(){ + return std::string_view{(const char*)parts[0].data(),parts[0].size()}; + } + uint32_t msg_id(){ + return *reinterpret_cast(parts[1].data()); + } + // received only + std::string_view topic(){ + return std::string_view{(const char*)parts[2].data(),parts[2].size()}; + } + std::string_view msg(){ + return std::string_view{(const char*)parts[3].data(),parts[3].size()}; + } + + // for setting success + void setsuccess(uint32_t succeeded){ + //zmq_msg_init_size(&parts[2],sizeof(uint32_t)); // this is from underlying c api... mismatch zmq_msg_t* / zmq::message_t + new(&parts[2]) zmq::message_t(sizeof(uint32_t)); // FIXME is there a better way to call zmq_msg_init_size? + memcpy((void*)parts[2].data(),&succeeded,sizeof(uint32_t)); // FIXME make bool instead of uint32_t? + return; + } + + // for read queries, returned directly from pqxx, decoded later + pqxx::result result; + std::string err; + + void Clear(){ + result.clear(); + err.clear(); + } + + // for setting responses of read queries + void setresponserows(size_t n_rows){ + //printf("ZmqQuery at %p set to %lu response rows\n",this, n_rows); + parts.resize(3+n_rows); + return; + } + + void setresponse(size_t row_num, std::string_view val){ + //zmq_msg_init_size(&parts[row_num+3],row.size()); // mismatch zmq_msg_t* / zmq::message_t + //printf("response part %lu set to %s on ZmqQuery at %p\n", row_num, val.data(), this); + new(&parts[row_num+3]) zmq::message_t(val.size()); // FIXME better way to call zmq_msg_init_size + memcpy((void*)parts[row_num+3].data(),val.data(),val.size()); + return; + } + + template + typename std::enable_if::value, void>::type + setresponse(size_t row_num, T val){ + //zmq_msg_init_size(&parts[row_num+3],row.size()); // mismatch zmq_msg_t* / zmq::message_t + + // what a mess. but only printf bypasses our great overlord's wonderful logging decorations + //std::ostringstream oss; + //oss << val; + //printf("response part %lu set to %s on ZmqQuery at %p\n", row_num, oss.str().c_str(), this); + + new(&parts[row_num+3]) zmq::message_t(sizeof(val)); // FIXME better way to call zmq_msg_init_size + memcpy((void*)parts[row_num+3].data(),&val,sizeof(val)); + return; + } + + // FOR DEBUG + // --------- + std::vector>> times; + void push_time(std::string_view s){ times.emplace_back(s, std::chrono::system_clock::now()); } + void print_times(){ + push_time("reply_send"); + for(size_t i=1; i %s: %u ",times[i-1].first.c_str(), times[i].first.c_str(), std::chrono::duration_cast(times[i].second-times[i-1].second).count()); + } + printf("\n"); + } + // --------- + +}; + + +#endif diff --git a/DataModel/query_topics.h b/DataModel/query_topics.h new file mode 100644 index 0000000..8b94ffb --- /dev/null +++ b/DataModel/query_topics.h @@ -0,0 +1,9 @@ +#ifndef QUERY_TYPES_H +#define QUERY_TYPES_H + +// used by MulticastWorkers and DatabaseWorkers +// only write query topics +enum class query_topic : char { alarm='A', dev_config='D', run_config='R', calibration='C', logging='L', monitoring='M', rootplot='T', plotlyplot='P', generic='Q' }; + +#endif + diff --git a/DataModel/type_name_as_string.cpp b/DataModel/type_name_as_string.cpp new file mode 100644 index 0000000..25ee8af --- /dev/null +++ b/DataModel/type_name_as_string.cpp @@ -0,0 +1,16 @@ +#include "type_name_as_string.h" + +std::string current_exception_name(){ + std::unique_ptr own + ( +#ifndef _MSC_VER + abi::__cxa_demangle(abi::__cxa_current_exception_type()->name(), nullptr, + nullptr, nullptr), +#else + nullptr, +#endif + std::free + ); + std::string r = own != nullptr ? own.get() : abi::__cxa_current_exception_type()->name(); + return r; +} diff --git a/DataModel/type_name_as_string.h b/DataModel/type_name_as_string.h new file mode 100644 index 0000000..0672f83 --- /dev/null +++ b/DataModel/type_name_as_string.h @@ -0,0 +1,47 @@ +#ifndef TypeNameAsString_h +#define TypeNameAsString_h +/* usage: +auto returnedcovmatrix = htrackfitresult->GetCovarianceMatrix(); +std::cout << type_name() << endl; +*/ + +#include +#include +#ifndef _MSC_VER +# include +#endif +#include +#include +#include + +template +std::string +type_name() +{ + typedef typename std::remove_reference::type TR; + std::unique_ptr own + ( +#ifndef _MSC_VER + abi::__cxa_demangle(typeid(TR).name(), nullptr, + nullptr, nullptr), +#else + nullptr, +#endif + std::free + ); + std::string r = own != nullptr ? own.get() : typeid(TR).name(); + if (std::is_const::value) + r += " const"; + if (std::is_volatile::value) + r += " volatile"; + if (std::is_lvalue_reference::value) + r += "&"; + else if (std::is_rvalue_reference::value) + r += "&&"; + return r; +} + +std::string current_exception_name(); + +#endif // define TypeNameAsString_h + diff --git a/Makefile b/Makefile index f65bb15..bc379ac 100644 --- a/Makefile +++ b/Makefile @@ -1,126 +1,105 @@ -ToolDAQPath=ToolDAQ +Dependencies=Dependencies +ToolFrameworkCore=$(Dependencies)/ToolFrameworkCore +ToolDAQFramework=$(Dependencies)/ToolDAQFramework +SOURCEDIR=`pwd` + +CXXFLAGS= -fmax-errors=3 -fPIC -std=c++20 -Wno-comment -Werror=array-bounds -Werror=return-type # -Wpedantic -Wall -Wno-unused -Wextra -Wcast-align -Wcast-qual -Wctor-dtor-privacy -Wdisabled-optimization -Wformat=2 -Winit-self -Wlogical-op -Wmissing-declarations -Wmissing-include-dirs -Wnoexcept -Woverloaded-virtual -Wredundant-decls -Wshadow -Wsign-conversion -Wsign-promo -Wstrict-null-sentinel -Wstrict-overflow=5 -Wswitch-default -Wundef #-Werror -Wold-style-cast -CXXFLAGS= -fPIC -Wpedantic -O3 # -g -lSegFault -rdynamic -DDEBUG -# -Wl,--no-as-needed ifeq ($(MAKECMDGOALS),debug) -CXXFLAGS+= -O1 -g -lSegFault -rdynamic -DDEBUG +CXXFLAGS+= -O0 -g -lSegFault -rdynamic -DDEBUG +else +CXXFLAGS+= -O3 endif -ZMQLib= -L $(ToolDAQPath)/zeromq-4.0.7/lib -lzmq -ZMQInclude= -I $(ToolDAQPath)/zeromq-4.0.7/include/ +DataModelInclude = +DataModelLib = -BoostLib= -L $(ToolDAQPath)/boost_1_66_0/install/lib -lboost_date_time -lboost_serialization -lboost_iostreams -BoostInclude= -I $(ToolDAQPath)/boost_1_66_0/install/include +MyToolsInclude = +MyToolsLib = -DataModelInclude = -DataModelLib = +ZMQLib= -L $(Dependencies)/zeromq-4.0.7/lib -lzmq +ZMQInclude= -I $(Dependencies)/zeromq-4.0.7/include/ + +BoostLib= -L $(Dependencies)/boost_1_66_0/install/lib -lboost_date_time -lboost_serialization -lboost_iostreams +BoostInclude= -I $(Dependencies)/boost_1_66_0/install/include + +#PostgresLib= -L $(Dependencies)/libpqxx-6.4.5/install/lib -lpqxx -L `pg_config --libdir` -lpq +#PostgresInclude= -I $(Dependencies)/libpqxx-6.4.5/install/include -I `pg_config --includedir` +PostgresLib= -L $(Dependencies)/libpqxx-7.10.4/install/lib -lpqxx -L `pg_config --libdir` -lpq +PostgresInclude= -I $(Dependencies)/libpqxx-7.10.4/install/include -I `pg_config --includedir` + +Includes=-I $(ToolFrameworkCore)/include/ -I $(ToolDAQFramework)/include/ -I $(SOURCEDIR)/include/ $(ZMQInclude) $(BoostInclude) $(PostgresInclude) +ToolLibraries = $(patsubst %, lib/%, $(filter lib%, $(subst /, , $(wildcard UserTools/*/*.so)))) +LIBRARIES=lib/libDataModel.so lib/libMyTools.so $(ToolLibraries) +DataModelHEADERS:=$(patsubst %.h, include/%.h, $(filter %.h, $(subst /, ,$(wildcard DataModel/*.h)))) +MyToolHEADERS:=$(patsubst %.h, include/%.h, $(filter %.h, $(subst /, ,$(wildcard UserTools/*/*.h) $(wildcard UserTools/*.h)))) +ToolLibs = $(patsubst %.so, %, $(patsubst lib%, -l%,$(filter lib%, $(subst /, , $(wildcard UserTools/*/*.so))))) +AlreadyCompiled = $(wildcard UserTools/$(filter-out %.so UserTools , $(subst /, ,$(wildcard UserTools/*/*.so)))/*.cpp) +SOURCEFILES:=$(patsubst %.cpp, %.o, $(filter-out $(AlreadyCompiled), $(wildcard src/*.cpp) $(wildcard UserTools/*/*.cpp) $(wildcard DataModel/*.cpp))) +Libs=-L $(SOURCEDIR)/lib/ -lDataModel -L $(ToolDAQFramework)/lib/ -lToolDAQChain -lDAQDataModelBase -lDAQLogging -lServiceDiscovery -lDAQStore -L $(ToolFrameworkCore)/lib/ -lToolChain -lMyTools -lDataModelBase -lLogging -lStore -lpthread $(ToolLibs) -L $(ToolDAQFramework)/lib/ -lToolDAQChain -lDAQDataModelBase -lDAQLogging -lServiceDiscovery -lDAQStore $(ZMQLib) $(BoostLib) $(PostgresLib) -MyToolsInclude = -MyToolsLib = + +#.SECONDARY: $(%.o) + +all: $(DataModelHEADERS) $(MyToolHEADERS) $(SOURCEFILES) $(LIBRARIES) main NodeDaemon RemoteControl debug: all -all: lib/libStore.so lib/libLogging.so lib/libDataModel.so include/Tool.h lib/libMyTools.so lib/libServiceDiscovery.so lib/libToolChain.so main RemoteControl NodeDaemon +main: src/main.o $(LIBRARIES) $(DataModelHEADERS) $(MyToolHEADERS) | $(SOURCEFILES) + @echo -e "\e[38;5;11m\n*************** Making " $@ " ****************\e[0m" + g++ $(CXXFLAGS) $< -o $@ $(Includes) $(Libs) $(DataModelInclude) $(DataModelLib) $(MyToolsInclude) $(MyToolsLib) + +include/%.h: + @echo -e "\e[38;5;87m\n*************** sym linking headers ****************\e[0m" + ln -s `pwd`/$(filter %$(strip $(patsubst include/%.h, /%.h, $@)), $(wildcard DataModel/*.h) $(wildcard UserTools/*/*.h) $(wildcard UserTools/*.h)) $@ -main: src/main.cpp | lib/libMyTools.so lib/libStore.so lib/libLogging.so lib/libToolChain.so lib/libDataModel.so lib/libServiceDiscovery.so - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - g++ $(CXXFLAGS) src/main.cpp -o main -I include -L lib -lStore -lMyTools -lToolChain -lDataModel -lLogging -lServiceDiscovery -lpthread $(DataModelInclude) $(DataModelLib) $(MyToolsInclude) $(MyToolsLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) +src/%.o : src/%.cpp + @echo -e "\e[38;5;214m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) -c $< -o $@ $(Includes) +UserTools/Factory/Factory.o : UserTools/Factory/Factory.cpp $(DataModelHEADERS) $(MyToolHEADERS) + @echo -e "\e[38;5;214m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) -c $< -o $@ $(Includes) $(DataModelInclude) $(ToolsInclude) -lib/libStore.so: $(ToolDAQPath)/ToolDAQFramework/src/Store/* - cd $(ToolDAQPath)/ToolDAQFramework && $(MAKE) lib/libStore.so - @echo -e "\e[38;5;118m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/src/Store/*.h include/ - cp $(ToolDAQPath)/ToolDAQFramework/lib/libStore.so lib/ - #g++ -g -O2 -fPIC -shared -I include $(ToolDAQPath)/ToolDAQFramework/src/Store/*.cpp -o lib/libStore.so $(BoostLib) $(BoostInclude) +UserTools/%.o : UserTools/%.cpp $(DataModelHEADERS) UserTools/%.h + @echo -e "\e[38;5;214m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) -c $< -o $@ $(Includes) $(DataModelInclude) $(ToolsInclude) +DataModel/%.o : DataModel/%.cpp DataModel/%.h $(DataModelHEADERS) + @echo -e "\e[38;5;214m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) -c $< -o $@ $(Includes) $(DataModelInclude) -include/Tool.h: $(ToolDAQPath)/ToolDAQFramework/src/Tool/Tool.h - @echo -e "\e[38;5;118m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/src/Tool/Tool.h include/ - cp UserTools/*.h include/ - cp UserTools/*/*.h include/ - cp DataModel/*.h include/ +lib/libDataModel.so: $(patsubst %.cpp, %.o , $(wildcard DataModel/*.cpp)) | $(DataModelHEADERS) + @echo -e "\e[38;5;201m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) --shared $^ -o $@ $(Includes) $(DataModelInclude) +lib/libMyTools.so: $(patsubst %.cpp, %.o , $(filter-out $(AlreadyCompiled), $(wildcard UserTools/*/*.cpp))) | $(DataModelHEADERS) $(MyToolHEADERS) + @echo -e "\e[38;5;201m\n*************** Making " $@ "****************\e[0m" + g++ $(CXXFLAGS) --shared $^ -o $@ $(Includes) $(DataModelInclude) $(MyToolsInclude) -lib/libToolChain.so: $(ToolDAQPath)/ToolDAQFramework/src/ToolChain/* | lib/libLogging.so lib/libStore.so lib/libMyTools.so lib/libServiceDiscovery.so lib/libLogging.so lib/libDataModel.so - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/UserTools/Factory/*.h include/ - cp $(ToolDAQPath)/ToolDAQFramework/src/ToolChain/*.h include/ - g++ $(CXXFLAGS) -shared $(ToolDAQPath)/ToolDAQFramework/src/ToolChain/ToolChain.cpp -I include -lpthread -L lib -lStore -lDataModel -lServiceDiscovery -lLogging -lMyTools -o lib/libToolChain.so $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(MyToolsInclude) $(BoostLib) $(BoostInclude) +lib/%.so: + @echo -e "\e[38;5;87m\n*************** sym linking Tool libs ****************\e[0m" + ln -s `pwd`/$(filter %$(strip $(patsubst lib/%.so, /%.so ,$@)), $(wildcard UserTools/*/*.so)) $@ +NodeDaemon: $(ToolDAQFramework)/NodeDaemon + @echo -e "\e[38;5;87m\n*************** sym linking " $@ " ****************\e[0m" + ln -s $(ToolDAQFramework)/NodeDaemon ./ -clean: +RemoteControl: $(ToolDAQFramework)/RemoteControl + @echo -e "\e[38;5;87m\n*************** sym linking " $@ " ****************\e[0m" + ln -s $(ToolDAQFramework)/RemoteControl ./ + +clean: @echo -e "\e[38;5;201m\n*************** Cleaning up ****************\e[0m" + rm -f */*/*.o + rm -f */*.o rm -f include/*.h rm -f lib/*.so - rm -f main - rm -f RemoteControl - rm -f NodeDaemon - rm -f UserTools/*/*.o - rm -f DataModel/*.o - -lib/libDataModel.so: DataModel/* lib/libLogging.so lib/libStore.so $(patsubst DataModel/%.cpp, DataModel/%.o, $(wildcard DataModel/*.cpp)) - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - cp DataModel/*.h include/ - #g++ -g -O2 -fPIC -shared DataModel/*.cpp -I include -L lib -lStore -lLogging -o lib/libDataModel.so $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - g++ $(CXXFLAGS) -shared DataModel/*.o -I include -L lib -lStore -lLogging -o lib/libDataModel.so $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - -lib/libMyTools.so: UserTools/*/* UserTools/* include/Tool.h lib/libLogging.so lib/libStore.so $(patsubst UserTools/%.cpp, UserTools/%.o, $(wildcard UserTools/*/*.cpp)) |lib/libDataModel.so - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - cp UserTools/*/*.h include/ - cp UserTools/*.h include/ - #g++ -g -O2 -fPIC -shared UserTools/Factory/Factory.cpp -I include -L lib -lStore -lDataModel -lLogging -o lib/libMyTools.so $(MyToolsInclude) $(MyToolsLib) $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - g++ $(CXXFLAGS) -shared UserTools/*/*.o -I include -L lib -lStore -lDataModel -lLogging -o lib/libMyTools.so $(MyToolsInclude) $(DataModelInclude) $(MyToolsLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - -RemoteControl: - cd $(ToolDAQPath)/ToolDAQFramework/ && $(MAKE) RemoteControl - @echo -e "\e[38;5;118m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/RemoteControl ./ - -NodeDaemon: - cd $(ToolDAQPath)/ToolDAQFramework/ && $(MAKE) NodeDaemon - @echo -e "\e[38;5;226m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/NodeDaemon ./ - -lib/libServiceDiscovery.so: $(ToolDAQPath)/ToolDAQFramework/src/ServiceDiscovery/* | lib/libStore.so - cd $(ToolDAQPath)/ToolDAQFramework && $(MAKE) lib/libServiceDiscovery.so - @echo -e "\e[38;5;118m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/src/ServiceDiscovery/ServiceDiscovery.h include/ - cp $(ToolDAQPath)/ToolDAQFramework/lib/libServiceDiscovery.so lib/ - #g++ -shared -fPIC -I include $(ToolDAQPath)/ToolDAQFramework/src/ServiceDiscovery/ServiceDiscovery.cpp -o lib/libServiceDiscovery.so -L lib/ -lStore $(ZMQInclude) $(ZMQLib) $(BoostLib) $(BoostInclude) - -lib/libLogging.so: $(ToolDAQPath)/ToolDAQFramework/src/Logging/* | lib/libStore.so - cd $(ToolDAQPath)/ToolDAQFramework && $(MAKE) lib/libLogging.so - @echo -e "\e[38;5;118m\n*************** Copying " $@ "****************\e[0m" - cp $(ToolDAQPath)/ToolDAQFramework/src/Logging/Logging.h include/ - cp $(ToolDAQPath)/ToolDAQFramework/lib/libLogging.so lib/ - #g++ -shared -fPIC -I include $(ToolDAQPath)/ToolDAQFramework/src/Logging/Logging.cpp -o lib/libLogging.so -L lib/ -lStore $(ZMQInclude) $(ZMQLib) $(BoostLib) $(BoostInclude) - -update: - @echo -e "\e[38;5;51m\n*************** Updating ****************\e[0m" - cd $(ToolDAQPath)/ToolDAQFramework; git pull - cd $(ToolDAQPath)/zeromq-4.0.7; git pull - git pull - - -UserTools/%.o: UserTools/%.cpp lib/libStore.so include/Tool.h lib/libLogging.so lib/libDataModel.so - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - cp $(shell dirname $<)/*.h include - -g++ -c $(CXXFLAGS) -o $@ $< -I include -L lib -lStore -lDataModel -lLogging $(MyToolsInclude) $(MyToolsLib) $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - -target: remove $(patsubst %.cpp, %.o, $(wildcard UserTools/$(TOOL)/*.cpp)) - -remove: - echo -e "removing" - -rm UserTools/$(TOOL)/*.o - -DataModel/%.o: DataModel/%.cpp lib/libLogging.so lib/libStore.so - @echo -e "\e[38;5;226m\n*************** Making " $@ "****************\e[0m" - cp $(shell dirname $<)/*.h include - -g++ -c $(CXXFLAGS) -o $@ $< -I include -L lib -lStore -lLogging $(DataModelInclude) $(DataModelLib) $(ZMQLib) $(ZMQInclude) $(BoostLib) $(BoostInclude) - + rm -rf main + rm -rf NodeDaemon + rm -rf RemoteControl Docs: doxygen Doxyfile + diff --git a/Setup.sh b/Setup.sh index f520e30..bce1648 100755 --- a/Setup.sh +++ b/Setup.sh @@ -1,11 +1,11 @@ #!/bin/bash +export PS1='${debian_chroot:+($debian_chroot)}\[\033[35;2;1m\]\u@\h\[\033[00m\]:\[\033[00;36m\]\w\[\033[00m\]\$ ' #Application path location of applicaiton - -ToolDAQapp=`pwd` +Dependencies=/opt #source ${ToolDAQapp}/ToolDAQ/root/bin/thisroot.sh -export LD_LIBRARY_PATH=`pwd`/lib:${ToolDAQapp}/lib:${ToolDAQapp}/ToolDAQ/zeromq-4.0.7/lib:${ToolDAQapp}/ToolDAQ/boost_1_66_0/install/lib:$LD_LIBRARY_PATH +export LD_LIBRARY_PATH=`pwd`/lib:${Dependencies}/zeromq-4.0.7/lib:${Dependencies}/boost_1_66_0/install/lib:${Dependencies}/libpqxx-7.10.4/install/lib:${Dependencies}/ToolFrameworkCore/lib:${Dependencies}/ToolDAQFramework/lib:$LD_LIBRARY_PATH export SEGFAULT_SIGNALS="all" diff --git a/UserTools/DatabaseWorkers/DatabaseWorkerMonitoring.h b/UserTools/DatabaseWorkers/DatabaseWorkerMonitoring.h new file mode 100644 index 0000000..c6d139f --- /dev/null +++ b/UserTools/DatabaseWorkers/DatabaseWorkerMonitoring.h @@ -0,0 +1,66 @@ +#ifndef DatabaseWorkerMonitoring_H +#define DatabaseWorkerMonitoring_H + +#include "MonitoringVariables.h" + +class DatabaseWorkerMonitoring : public MonitoringVariables { + public: + DatabaseWorkerMonitoring(){}; + ~DatabaseWorkerMonitoring(){}; + + std::atomic logging_submissions; + std::atomic logging_submissions_failed; + std::atomic monitoring_submissions; + std::atomic monitoring_submissions_failed; + std::atomic rootplot_submissions; + std::atomic rootplot_submissions_failed; + std::atomic plotlyplot_submissions; + std::atomic plotlyplot_submissions_failed; + std::atomic alarm_submissions; + std::atomic alarm_submissions_failed; + std::atomic devconfig_submissions; + std::atomic devconfig_submissions_failed; + std::atomic runconfig_submissions; + std::atomic runconfig_submissions_failed; + std::atomic calibration_submissions; + std::atomic calibration_submissions_failed; + std::atomic generic_submissions; + std::atomic generic_submissions_failed; + std::atomic readquery_submissions; + std::atomic readquery_submissions_failed; + std::atomic jobs_completed; + std::atomic jobs_failed; + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"logging_submissions\":"+std::to_string(logging_submissions.load()) + +",\"logging_submissions_failed\":"+std::to_string(logging_submissions_failed.load()) + +",\"monitoring_submissions\":"+std::to_string(monitoring_submissions.load()) + +",\"monitoring_submissions_failed\":"+std::to_string(monitoring_submissions_failed.load()) + +",\"rootplot_submissions\":"+std::to_string(rootplot_submissions.load()) + +",\"rootplot_submissions_failed\":"+std::to_string(rootplot_submissions_failed.load()) + +",\"plotlyplot_submissions\":"+std::to_string(plotlyplot_submissions.load()) + +",\"plotlyplot_submissions_failed\":"+std::to_string(plotlyplot_submissions_failed.load()) + +",\"alarm_submissions\":"+std::to_string(alarm_submissions.load()) + +",\"alarm_submissions_failed\":"+std::to_string(alarm_submissions_failed.load()) + +",\"devconfig_submissions\":"+std::to_string(devconfig_submissions.load()) + +",\"devconfig_submissions_failed\":"+std::to_string(devconfig_submissions_failed.load()) + +",\"runconfig_submissions\":"+std::to_string(runconfig_submissions.load()) + +",\"runconfig_submissions_failed\":"+std::to_string(runconfig_submissions_failed.load()) + +",\"calibration_submissions\":"+std::to_string(calibration_submissions.load()) + +",\"calibration_submissions_failed\":"+std::to_string(calibration_submissions_failed.load()) + +",\"generic_submissions\":"+std::to_string(generic_submissions.load()) + +",\"generic_submissions_failed\":"+std::to_string(generic_submissions_failed.load()) + +",\"readquery_submissions\":"+std::to_string(readquery_submissions.load()) + +",\"readquery_submissions_failed\":"+std::to_string(readquery_submissions_failed.load()) + +",\"jobs_failed\":"+std::to_string(jobs_failed.load()) + +",\"jobs_completed\":"+std::to_string(jobs_completed.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/DatabaseWorkers/DatabaseWorkers.cpp b/UserTools/DatabaseWorkers/DatabaseWorkers.cpp new file mode 100644 index 0000000..f656d8d --- /dev/null +++ b/UserTools/DatabaseWorkers/DatabaseWorkers.cpp @@ -0,0 +1,800 @@ +#include "DatabaseWorkers.h" +#include +#include +//#include + +DatabaseWorkers::DatabaseWorkers():Tool(){} + +std::string DatabaseWorkers::connection_string=""; + +bool DatabaseWorkers::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + /* ----------------------------------------- */ + /* Configuration */ + /* ----------------------------------------- */ + + m_verbose=1; + std::string dbhostname = "/tmp"; // '/tmp' = local unix socket + std::string dbhostaddr = ""; // fallback if hostname is empty, an ip address + int dbport = 5432; // database port + std::string dbname = "daq"; // database name + std::string dbuser = ""; // database user to connect as. defaults to PGUSER env var if empty. + std::string dbpasswd = ""; // database password. defaults to PGPASS or PGPASSFILE if not given. + + // on authentication: we may consider using 'ident', which will permit the + // user to connect to the database as the postgres user with name matching + // their OS username, and/or the database user mapped to their username + // with the pg_ident.conf file in postgres database. in such a case dbuser and dbpasswd + // should be left empty + + m_variables.Get("verbose",m_verbose); + m_variables.Get("hostname",dbhostname); + m_variables.Get("hostaddr",dbhostaddr); + m_variables.Get("dbname",dbname); + m_variables.Get("port",dbport); + m_variables.Get("user",dbuser); + m_variables.Get("passwd",dbpasswd); + // number of database workers - FIXME needs to match concurrency of postgres backend + max_workers = 10; + m_variables.Get("max_workers", max_workers); + + ExportConfiguration(); + + /* ----------------------------------------- */ + /* Thread Setup */ + /* ----------------------------------------- */ + + // monitoring struct to encapsulate tracking info + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + // we *do* need a unique worker pool here because these workers + // maintain a connection to the database, so are a 'limited resource' + job_manager = new WorkerPoolManager(database_jobqueue, &max_workers, &(m_data->thread_cap), &(m_data->num_threads), nullptr, true); + + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + thread_args.job_queue = &database_jobqueue; + if(!m_data->utils.CreateThread("database_job_distributor", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + /* ----------------------------------------- */ + /* DB Test */ + /* ----------------------------------------- */ + + // pass connection details to the postgres interface class + std::stringstream tmp; + if(dbhostname!="") tmp<<" host="< Log + return false; + } + // closes connection here on destruction + } catch (const pqxx::broken_connection &e){ + // as usual the doxygen sucks, but it seems this doesn't provide + // any further methods to obtain information about the failure mode, + // so probably not useful to catch this explicitly. + std::cerr << e.what() << std::endl; // FIXME cerr -> Log + return false; + } + catch (std::exception const &e){ + std::cerr << current_exception_name()<<": "< Log + return false; + } + + return true; +} + + +bool DatabaseWorkers::Execute(){ + + // the main thread is going to lock the datamodel vector of queries + // grab a bunch of entries, and spin off a job for each batch of queries + // (possibly doing this several times to spin off multiple jobs) + + // FIXME ok but actually this kills all our jobs, not just our job distributor + // so we don't want to do that. + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + return true; +} + + +bool DatabaseWorkers::Finalise(){ + + // signal job distributor thread to stop + Log("Joining job distributor thread",v_warning); + m_data->utils.KillThread(&thread_args); + Log("Finished",v_warning); + m_data->num_threads--; + + // deleting the worker pool manager will kill all the worker threads + Log("Joining database worker thread pool",v_warning); + delete job_manager; + job_manager = nullptr; + m_data->num_threads--; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + + return true; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void DatabaseWorkers::Thread(Thread_args* args){ + + DatabaseJobDistributor_args* m_args = dynamic_cast(args); + + // get a new Job to the job queue to process this data + if(m_args->the_job==nullptr){ + m_args->the_job = m_args->m_data->job_pool.GetNew("database_worker"); + m_args->the_job->out_pool = &m_args->m_data->job_pool; + + if(m_args->the_job->data == nullptr){ + // on first creation of the job, make it a JobStruct to encapsulate its data + // N.B. Pool::GetNew will only invoke the constructor if this is a new instance, + // (not if it's been used before and then returned to the pool) + // so don't pass job-specific variables to the constructor + m_args->the_job->data = m_args->job_struct_pool.GetNew(&m_args->job_struct_pool, m_args->m_data, m_args->monitoring_vars); + } else { + // FIXME error + std::cerr<<"database_worker Job with non-null data pointer!"<the_job->func = DatabaseJob; + m_args->the_job->fail_func = DatabaseJobFail; + + // FIXME this could leak the_job if the toolchain ends... gonna ignore that, i dunno how to handle it. + } + + DatabaseJobStruct* job_data = static_cast(m_args->the_job->data); + job_data->clear(); + + // XXX ok we have flexibility here on how much we want each worker to grab + // the more we do in one transaction (one job) the better throughput... + // but with possibly greater latency on replies + + // grab logging queries + std::unique_lock locker(m_args->m_data->log_query_queue_mtx); + if(!m_args->m_data->log_query_queue.empty()){ + std::swap(m_args->m_data->log_query_queue, job_data->logging_queue); + //printf("DbJobDistributor grabbed %d log batches\n",job_data->logging_queue.size()); + } + + // grab monitoring queries + locker = std::unique_lock(m_args->m_data->mon_query_queue_mtx); + if(!m_args->m_data->mon_query_queue.empty()){ + std::swap(m_args->m_data->mon_query_queue, job_data->monitoring_queue); + } + + // if rootplot queries go over multicast, grab those + locker = std::unique_lock(m_args->m_data->rootplot_query_queue_mtx); + if(!m_args->m_data->rootplot_query_queue.empty()){ + std::swap(m_args->m_data->rootplot_query_queue, job_data->rootplot_queue); + } + + // if plotlyplot queries go over multicast, grab those + locker = std::unique_lock(m_args->m_data->plotlyplot_query_queue_mtx); + if(!m_args->m_data->plotlyplot_query_queue.empty()){ + std::swap(m_args->m_data->plotlyplot_query_queue, job_data->plotlyplot_queue); + } + + // grab write queries + locker = std::unique_lock(m_args->m_data->write_query_queue_mtx); + if(!m_args->m_data->write_query_queue.empty()){ + std::swap(m_args->m_data->write_query_queue, job_data->write_queue); + //printf("DbJobDistributor grabbed %d write query batches\n",job_data->write_queue.size()); + } + + // grab read queries + locker = std::unique_lock(m_args->m_data->read_msg_queue_mtx); + if(!m_args->m_data->read_msg_queue.empty()){ + std::swap(m_args->m_data->read_msg_queue, job_data->read_queue); + //printf("DbJobDistributor grabbed %d read query batches\n",job_data->read_queue.size()); + } + + locker.unlock(); + + // check if the job had something to do + if(job_data->logging_queue.empty() && + job_data->monitoring_queue.empty() && + job_data->rootplot_queue.empty() && + job_data->plotlyplot_queue.empty() && + job_data->write_queue.empty() && + job_data->read_queue.empty()){ + usleep(100); + return; + } + + //printf("DbJobDistributor making db job!\n"); + job_data->m_job_name = "database_worker"; + + m_args->job_queue->AddJob(m_args->the_job); + m_args->the_job = nullptr; + + return; + +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void DatabaseWorkers::DatabaseJobFail(void*& arg){ + + // safety check in case the job somehow fails after returning its args to the pool + if(arg==nullptr){ + std::cerr<<"multicast worker fail with no args"< back somewhere for the failures + // to be reported to the clients + //m_args->m_data->query_buffer_pool.Add(m_args->msg_buffer); << FIXME not back to the pool but reply queue + + //query.result.clear(); // to clear/release bad results... + // ideally we want to pass back an error or what happened to the client (set query.err) + //query.err = ??? but what was the problem? + + DatabaseJobStruct* m_args=static_cast(arg); + std::cerr<m_job_name<<" failure"<monitoring_vars->jobs_failed); + + //for(QueryBatch* q : m_args->read_queue) q->push_time("DB_spawn"); + //for(QueryBatch* q : m_args->write_queue) q->push_time("DB_spawn"); + + // return our job args to the pool + m_args->m_pool->Add(m_args); + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +bool DatabaseWorkers::DatabaseJob(void*& arg){ + + DatabaseJobStruct* m_args = static_cast(arg); + //printf("DB worker starting!\n"); + //for(QueryBatch* q : m_args->read_queue) q->push_time("DB_start"); + //for(QueryBatch* q : m_args->write_queue) q->push_time("DB_start"); + + // the worker will need a connection to the database + thread_local std::unique_ptr conn; + if(conn==nullptr){ + conn.reset(new pqxx::connection(DatabaseWorkers::connection_string)); + if(!conn){ + //Log("Failed to open connection to database for worker thread!",v_error); // FIXME logging + // FIXME terminate this worker... m_args->running=false? + return false; + } else { + // set up prepared statements. These are, sadly, a property of the connection + // logging insert + conn->prepare("logging_insert", "INSERT INTO logging ( time, device, severity, message ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, severity int, message text)"); + // monitoring insert + conn->prepare("monitoring_insert", "INSERT INTO monitoring ( time, device, subject, data ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, subject text, data jsonb)"); + // alarms insert + conn->prepare("alarms_insert", "INSERT INTO alarms ( time, device, level, alarm ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, level int, alarm text)"); + // rootplot insert + conn->prepare("rootplots_insert", "INSERT INTO rootplots ( time, name, data, draw_options, lifetime ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, name text, data jsonb, draw_options text, lifetime int) returning version"); + // plotlyplot insert + conn->prepare("plotlyplots_insert", "INSERT INTO plotlyplots ( time, name, data, layout, lifetime ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, name text, data jsonb, layout jsonb, lifetime int) returning version"); + // calibration insert + conn->prepare("calibration_insert", "INSERT INTO calibration ( time, name, description, data ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, name text, description text, data jsonb) returning version"); + // device config insert + conn->prepare("device_config_insert", "INSERT INTO device_config ( time, device, author, description, data ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, author text, description text, data jsonb) returning version"); + // run config insert + conn->prepare("run_config_insert", "INSERT INTO run_config ( time, name, author, description, data ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, name text, author text, description text, data jsonb) returning config_id"); + } + } + + // FIXME if the DB goes down, implement some sort of pausing(?) or local recording to local disk (SQLite?) + + // we use a single transaction for all queries, so open that now + pqxx::work* tx = new pqxx::work(*conn.get()); // aka pqxx::transaction<> + + // start with the read queries. + // since these don't actually modify the database, if any query or the final 'commit' fails, + // any preceding queries should already have their results, so we don't need to re-do them. + + // each batch contains a vector of queries, but unlike inserts, we can't batch these + // as we need the results from each and i'm not sure how we'd tell them apart if we batch submitted. + // for giggles, we'll pipeline them. This may even improve performance. + + // we handle batches serially, rather than inserting all batches at once before pulling everything + // XXX we could consider the latter, if it improved performance - the only drawback is we need to + // re-sumbit all remaining queries each time one errors, which is more overhead the more we submit. + pqxx::pipeline* px = new pqxx::pipeline(*tx); + //printf("processing %d read query batches\n",m_args->read_queue.size()); + for(QueryBatch* batch : m_args->read_queue){ + + //printf("pipelining batch of %d read queries\n",batch->queries.size()); + + // if a query in the pipeline fails, all subsequent queries will also fail + // so we'll need to go back and re-submit them. + // Keep track of where we got to in case we need to do this. + m_args->last_i=0; + + do { + m_args->ids.clear(); + m_args->pipeline_error=false; + + // XXX set the pipeline to retain 1/2 the queries we're going to insert before pushing to backend? + px->retain((batch->queries.size() - m_args->last_i)/2); + + // push the queries to the DB + for(size_t i=m_args->last_i; iqueries.size(); ++i){ + m_args->ids.push_back(px->insert(batch->queries[i].msg())); + } + + // pull the results + for(size_t i=0; iids.size(); ++i){ + ZmqQuery& query = batch->queries[i+m_args->last_i]; + try { + // XXX retrieving a given id blocks until that result is available + // perhaps we could check is_finished(id) and if not, pull other results while we wait + // not sure if this would be faster, but it would certainly be more complex + query.result = px->retrieve(m_args->ids[i]); + ++(m_args->monitoring_vars->readquery_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->readquery_submissions_failed); + query.result.clear(); + query.err = current_exception_name()+": "+e.what(); // store info about what failed + std::cerr<<"dbworker read query '"<pipeline_error = true; + m_args->last_i += i+1; + + // pipeline::flush docs say "a backend transaction is aborted automatically when an error occurs" + delete tx; + tx = new pqxx::work(*conn.get()); + + // and we need a new pipeline too + delete px; + px = new pqxx::pipeline(*tx); + + break; + } + } + + } while(m_args->pipeline_error); + + // sanity check + if(!px->empty()){ + // pipeline is somehow still not empty even after we should have retrieved everything...?? + std::cerr<<"dbworker pipeline has surplus results?!"<flush(); // cancel pending queries and discard results... i guess?? + } + + } + // ok we're done with the pipeline: close it and detach, whatever that means. + px->complete(); + + //for(QueryBatch* q : m_args->read_queue) q->push_time("DB_done"); + + // might as well pass them out for distribution now + if(!m_args->read_queue.empty()){ + //printf("returning %d read replies to datamodel\n", m_args->read_queue.size()); + std::unique_lock locker(m_args->m_data->query_results_mtx); + m_args->m_data->query_results.insert(m_args->m_data->query_results.end(), + m_args->read_queue.begin(),m_args->read_queue.end()); + } + + // write queries. + // ok, so the problem with this is if any query fails within a transaction, the transaction dies + // and nothing gets committed to the DB - everything up to that point needs re-running. + // we could use: + //pqxx::substransaction sub(tx); + // aka create savepoint and rollback on error. but this may be harmful for performance in insidious ways + + // but we do something different: loop, doing the stuff until it works. + // on successive iterations we skip things we found threw errors the last time. + // in theory we only need two loops.... but errors may be due to transient things, + // and i guess we just need to keep trying until they work? + + m_args->last_i=0; + + do { + + // ok, riskiest bit first: if we fail, fail early so that we have minimal work to re-do. + // user's generic queries - we have not validated any SQL here, so who knows what could happen... + + // for better robustness we could use nontransaction (autocommit) for this bit, but that may be slower... + // alternatively if there's a lot maybe we could use a pipeline, but the overhead may not be worth it... + + // TODO can we code this in a more elegant way? + m_args->last_i = (m_args->endpoint==DatabaseJobStep::generics) ? m_args->endpoint_i : m_args->write_queue.size(); + + for(size_t i=0; ilast_i; ++i){ + QueryBatch* batch = m_args->write_queue[i]; + //printf("executing %d generic queries for next batch\n",batch->generic_query_indices.size()); + size_t last_j = (m_args->endpoint==DatabaseJobStep::generics) ? m_args->endpoint_j : batch->generic_query_indices.size(); + for(size_t j=m_args->checkpoint_j; jqueries[batch->generic_query_indices[j]]; + if(!query.err.empty()) continue; // skip queries flagged bad on a previous iteration + try { + query.result = tx->exec(query.msg()); + ++(m_args->monitoring_vars->generic_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->generic_submissions_failed); + query.result.clear(); + query.err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker generic query '"<(&e); + //if(sqle) std::cerr<<"SQLSTATE is now "<sqlstate()<checkpoint_i = i+1; + m_args->checkpoint_j = j; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + } + + if(!m_args->had_error){ + if(m_args->endpoint==DatabaseJobStep::logging) goto commitit; + m_args->checkpoint = DatabaseJobStep::logging; + } + + // insert new logging statements + m_args->last_i = (m_args->endpoint==DatabaseJobStep::logging) ? m_args->endpoint_i : m_args->logging_queue.size(); + + //printf("calling prepped for %d logging batches\n",m_args->logging_queue.size()); + for(size_t i=0; ilast_i; ++i){ + if(m_args->bad_logs.count(i)) continue; + std::string* batch = m_args->logging_queue[i]; + //printf("dbworker inserting logging batch: '%s'\n",batch->c_str()); + try { + tx->exec(pqxx::prepped{"logging_insert"}, pqxx::params{*batch}); + ++(m_args->monitoring_vars->logging_submissions); + } catch (std::exception& e){ + std::cerr<<"dbworker log insert failed with "<monitoring_vars->logging_submissions_failed); + // FIXME log the error here + // FIXME if we catch (pqxx::sql_error const &e) or others can we get better information? + // after error the transaction becomes unusable, and we must open a new one + m_args->bad_logs.emplace(i); + m_args->checkpoint_i = i; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + m_args->m_data->multicast_batch_pool.Add(batch); + } + if(!m_args->had_error){ + if(m_args->endpoint==DatabaseJobStep::monitoring) goto commitit; + m_args->checkpoint = DatabaseJobStep::monitoring; + } + + m_args->last_i = (m_args->endpoint==DatabaseJobStep::monitoring) ? m_args->endpoint_i : m_args->monitoring_queue.size(); + + // insert new monitoring statements + //printf("calling prepped for %d monitoring batches\n",m_args->monitoring_queue.size()); + for(size_t i=0; ilast_i; ++i){ + if(m_args->bad_mons.count(i)) continue; + std::string* batch = m_args->monitoring_queue[i]; + try { + tx->exec(pqxx::prepped{"monitoring_insert"}, pqxx::params{*batch}); + ++(m_args->monitoring_vars->monitoring_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->monitoring_submissions_failed); + std::cerr<<"dbworker mon insert failed with "<bad_mons.emplace(i); + m_args->checkpoint_i = i; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + m_args->m_data->multicast_batch_pool.Add(batch); + } + if(!m_args->had_error){ + if(m_args->endpoint==DatabaseJobStep::rootplots) goto commitit; + m_args->checkpoint = DatabaseJobStep::rootplots; + } + + m_args->last_i = (m_args->endpoint==DatabaseJobStep::rootplots) ? m_args->endpoint_i : m_args->rootplot_queue.size(); + + // insert new multicast rootplot statements + //printf("calling prepped for %d rootplot batches\n",m_args->rootplot_queue.size()); + for(size_t i=0; ilast_i; ++i){ + if(m_args->bad_rootplots.count(i)) continue; + std::string* batch = m_args->rootplot_queue[i]; + try { + tx->exec(pqxx::prepped{"rootplots_insert"}, pqxx::params{*batch}); + ++(m_args->monitoring_vars->rootplot_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->rootplot_submissions_failed); + std::cerr<<"dbworker rootplot insert failed with "<bad_rootplots.emplace(i); + m_args->checkpoint_i = i; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + m_args->m_data->multicast_batch_pool.Add(batch); + } + if(!m_args->had_error){ + if(m_args->endpoint==DatabaseJobStep::plotlyplots) goto commitit; + m_args->checkpoint = DatabaseJobStep::plotlyplots; + } + + m_args->last_i = (m_args->endpoint==DatabaseJobStep::plotlyplots) ? m_args->endpoint_i : m_args->plotlyplot_queue.size(); + + // insert new multicast plotlyplot statements + //printf("calling prepped for %d plotlyplot batches\n",m_args->plotlyplot_queue.size()); + for(size_t i=0; ilast_i; ++i){ + if(m_args->bad_plotlyplots.count(i)) continue; + std::string* batch = m_args->plotlyplot_queue[i]; + try { + tx->exec(pqxx::prepped{"plotlyplots_insert"}, pqxx::params{*batch}); + ++(m_args->monitoring_vars->plotlyplot_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->plotlyplot_submissions_failed); + std::cerr<<"dbworker plotlyplot insert failed with "<bad_plotlyplots.emplace(i); + m_args->checkpoint_i = i; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + m_args->m_data->multicast_batch_pool.Add(batch); + } + if(!m_args->had_error){ + if(m_args->endpoint==DatabaseJobStep::writes) goto commitit; + m_args->checkpoint = DatabaseJobStep::writes; + } + + m_args->last_i = (m_args->endpoint==DatabaseJobStep::writes) ? m_args->endpoint_i : m_args->write_queue.size(); + + // write queries + //printf("processing %d write batches\n",m_args->write_queue.size()); + for(size_t i=0; ilast_i; ++i){ + QueryBatch* batch = m_args->write_queue[i]; + // the batch gets split up by WriteWorkers into a buffer for each type of write query + + // alarm insertions return nothing, just catch errors + if(batch->got_alarms() && batch->alarm_batch_err.empty()){ + //printf("calling prepped for alarm buffer '%s'\n",batch->alarm_buffer.c_str()); + try { + tx->exec(pqxx::prepped{"alarms_insert"}, pqxx::params{batch->alarm_buffer}); + ++(m_args->monitoring_vars->alarm_submissions); + } catch (std::exception& e){ + batch->alarm_batch_err = current_exception_name()+": "+e.what(); + ++(m_args->monitoring_vars->alarm_submissions_failed); + std::cerr<<"dbworker alarm batch '"<alarm_buffer<<"' insert failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + // the remaining insertions return the new version number + // `pqxx::transaction_base::for_query` runs a query and invokes a callable for each result row + // we use this to collect the returned version numbers into a vector + // N.B. `pqxx::transaction_base::for_stream` is an alternative that is faster for large results + // but slower for small results. TODO check whether ours count as 'large' .. probably not. + + // device config insertions + if(batch->got_devconfigs() && batch->devconfig_batch_err.empty()){ + //printf("calling prepped for dev_config buffer '%s'\n",batch->devconfig_buffer.c_str()); + try { + tx->for_query(pqxx::prepped{"device_config_insert"}, + [&batch](uint16_t new_version_num){ + batch->devconfig_version_nums.push_back(new_version_num); + }, pqxx::params{batch->devconfig_buffer}); + ++(m_args->monitoring_vars->devconfig_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->devconfig_submissions_failed); + batch->devconfig_batch_err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker devconfig insert '"<devconfig_buffer<<"' failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + // run config insertions + if(batch->got_runconfigs() && batch->runconfig_batch_err.empty()){ + //printf("calling prepped for run_config buffer '%s'\n",batch->runconfig_buffer.c_str()); + try { + tx->for_query(pqxx::prepped{"run_config_insert"}, + [&batch](uint16_t new_version_num){ + batch->runconfig_version_nums.push_back(new_version_num); + }, pqxx::params{batch->runconfig_buffer}); + ++(m_args->monitoring_vars->runconfig_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->runconfig_submissions_failed); + batch->runconfig_batch_err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker runconfig insert '"<runconfig_buffer<<"' failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + // calibration data insertions + if(batch->got_calibrations() && batch->calibration_batch_err.empty()){ + //printf("calling prepped for calibration buffer '%s'\n",batch->calibration_buffer.c_str()); + try { + tx->for_query(pqxx::prepped{"calibration_insert"}, + [&batch](uint16_t new_version_num){ + batch->calibration_version_nums.push_back(new_version_num); + }, pqxx::params{batch->calibration_buffer}); + ++(m_args->monitoring_vars->calibration_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->calibration_submissions_failed); + batch->calibration_batch_err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker calibration insert '"<calibration_buffer<<"' failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + // rootplot insertions + if(batch->got_rootplots() && batch->rootplot_batch_err.empty()){ + //printf("calling prepped for rootplots buffer '%s'\n",batch->rootplot_buffer.c_str()); + try { + tx->for_query(pqxx::prepped{"rootplots_insert"}, + [&batch](uint16_t new_version_num){ + batch->rootplot_version_nums.push_back(new_version_num); + }, pqxx::params{batch->rootplot_buffer}); + ++(m_args->monitoring_vars->rootplot_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->rootplot_submissions_failed); + batch->rootplot_batch_err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker rootplot insert '"<rootplot_buffer<<"' failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + // plotlyplot insertions + if(batch->got_plotlyplots() && batch->plotlyplot_batch_err.empty()){ + //printf("calling prepped for plotlyplots buffer '%s'\n",batch->plotlyplot_buffer.c_str()); + try { + tx->for_query(pqxx::prepped{"plotlyplots_insert"}, + [&batch](uint16_t new_version_num){ + batch->plotlyplot_version_nums.push_back(new_version_num); + }, pqxx::params{batch->plotlyplot_buffer}); + ++(m_args->monitoring_vars->plotlyplot_submissions); + } catch (std::exception& e){ + ++(m_args->monitoring_vars->plotlyplot_submissions_failed); + batch->plotlyplot_batch_err = current_exception_name()+": "+e.what(); + std::cerr<<"dbworker plotlyplot insert '"<plotlyplot_buffer<<"' failed with "<checkpoint_i = i+1; + m_args->had_error=true; + delete tx; + tx = new pqxx::work(*conn.get()); + } + } + + } + + // commit the work we've done + commitit: + try { + tx->commit(); + + m_args->endpoint = m_args->checkpoint; + m_args->endpoint_i = m_args->checkpoint_i; + m_args->endpoint_j = m_args->checkpoint_j; + + } catch(pqxx::in_doubt_error& e){ + // ughhhhhhh.... + // basically this means the transaction may have commited or not, pqxx is not sure. + // it's up to us to figure that out, perhaps by querying for the last inserted record + // FIXME for now, we leave that as a problem for another day... + std::cerr<<"dbworker caught "<had_error = true; + + } + + // if we had no errors, we're done. + if(!m_args->had_error) break; + + // if something errored, the the pqxx::transaction will have aborted + // and all insertions to the database before that point (the checkpoint) will have been lost. + // so loop back to the start and re-run up to the point of last error (endpoint) + // this time skipping bad queries to hopefully avoid any errors + //printf("%s encountered error, re-running up to checkpoint %d\n",m_args->m_job_name, m_args->endpoint); + m_args->had_error=false; + + } while(true); // keep trying until we've submitted everything we can. + // FIXME maybe we should add a limiter to stop one job running forever? + // FIXME we probably need better separation of error types for this + // FIXME at some point we want to also fall back to dumping to local disk if DB is inaccessible + // N.B. that will probably result in duplicates in the on-disk version if we don't record what + // committed succesfully, but that's probably easier to handle when uploading the file to DB + // e.g. with 'ON CONFLICT' or somesuch + + //for(QueryBatch* q : m_args->write_queue) q->push_time("DB_done"); + + // pass the batch onto the next stage of the pipeline for the DatabaseWorkers + if(!m_args->write_queue.empty()){ + //printf("returning %d write acknowledgements to datamodel\n", m_args->write_queue.size()); + std::unique_lock locker(m_args->m_data->query_results_mtx); + m_args->m_data->query_results.insert(m_args->m_data->query_results.end(), + m_args->write_queue.begin(),m_args->write_queue.end()); + } + + //printf("%s completed\n",m_args->m_job_name.c_str()); + ++(m_args->monitoring_vars->jobs_completed); + + // return our job args to the pool + m_args->m_pool->Add(m_args); // return our job args to the job args struct pool + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + + return true; +} + + diff --git a/UserTools/DatabaseWorkers/DatabaseWorkers.h b/UserTools/DatabaseWorkers/DatabaseWorkers.h new file mode 100644 index 0000000..043c602 --- /dev/null +++ b/UserTools/DatabaseWorkers/DatabaseWorkers.h @@ -0,0 +1,110 @@ +#ifndef DatabaseWorkers_H +#define DatabaseWorkers_H + +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "WorkerPoolManager.h" +#include "DatabaseWorkerMonitoring.h" + +/** +* \class DatabaseWorkers +* +* This Tool manages a pool of workers, each with a connection to the backend database, to run the queries. +* +* $Author: M. O'Flaherty $ +* $Date: 2025/12/08 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +enum class DatabaseJobStep { generics, logging, monitoring, rootplots, plotlyplots, writes, finish }; + +struct DatabaseJobStruct { + + DatabaseJobStruct(Pool* pool, DataModel* data, DatabaseWorkerMonitoring* mon) : m_pool(pool), m_data(data), monitoring_vars(mon){}; + DataModel* m_data; + DatabaseWorkerMonitoring* monitoring_vars; + Pool* m_pool; + std::string m_job_name; + + std::vector read_queue; + std::vector write_queue; + std::vector logging_queue; + std::vector monitoring_queue; + std::vector rootplot_queue; + std::vector plotlyplot_queue; + + std::set bad_logs; + std::set bad_mons; + std::set bad_rootplots; + std::set bad_plotlyplots; + + uint16_t last_i; + + std::vector ids; + bool pipeline_error; + + bool had_error; + DatabaseJobStep checkpoint; + DatabaseJobStep endpoint; + size_t checkpoint_i; + size_t checkpoint_j; + size_t endpoint_i; + size_t endpoint_j; + + void clear(){ + read_queue.clear(); + write_queue.clear(); + logging_queue.clear(); + monitoring_queue.clear(); + rootplot_queue.clear(); + plotlyplot_queue.clear(); + + bad_logs.clear(); + bad_mons.clear(); + bad_rootplots.clear(); + bad_plotlyplots.clear(); + + had_error=false; + endpoint=DatabaseJobStep::finish; + + } + +}; + +struct DatabaseJobDistributor_args : Thread_args { + DataModel* m_data; + DatabaseWorkerMonitoring* monitoring_vars; + Pool job_struct_pool; + JobQueue* job_queue; + Job* the_job = nullptr; + +}; + +class DatabaseWorkers: public Tool { + + public: + DatabaseWorkers(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose. + bool Finalise(); ///< Finalise function used to clean up resources. + + private: + static void Thread(Thread_args* args); + DatabaseJobDistributor_args thread_args; + DatabaseWorkerMonitoring monitoring_vars; + + WorkerPoolManager* job_manager=nullptr; ///< manager for worker farm, has internal background thread that spawns new jobs and or prunes them, along with tracking statistics + JobQueue database_jobqueue; ///< job queue for worker farm + + unsigned int max_workers; // for some reason workerpoolmanager only takes a pointer to this, not a copy + static std::string connection_string; + + static bool DatabaseJob(void*& arg); + static void DatabaseJobFail(void*& args); + +}; + +#endif diff --git a/UserTools/DatabaseWorkers/README.md b/UserTools/DatabaseWorkers/README.md new file mode 100644 index 0000000..a737d86 --- /dev/null +++ b/UserTools/DatabaseWorkers/README.md @@ -0,0 +1,19 @@ +# DatabaseWorkers + +DatabaseWorkers + +## Data + +Describe any data formats DatabaseWorkers creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for DatabaseWorkers. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/DummyTool/DummyTool.cpp b/UserTools/DummyTool/DummyTool.cpp index b99ff88..b7ccdee 100644 --- a/UserTools/DummyTool/DummyTool.cpp +++ b/UserTools/DummyTool/DummyTool.cpp @@ -5,37 +5,68 @@ DummyTool::DummyTool():Tool(){} bool DummyTool::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); - //m_variables.Print(); + InitialiseTool(data); + InitialiseConfiguration(configfile); - m_data= &data; - m_log= m_data->Log; + //m_variables.Print(); - if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; - Log("test 1",1,m_verbose); + Log("test 1",1); + + ExportConfiguration(); return true; } bool DummyTool::Execute(){ + + // example of print out methods + // mesage level indicates the minimum verbosity level to print out a message + // Therefore a message level of 0 is always printed so should be used for high priority messages e.g. errors + // and a message level or 9 would be for minor messgaes rarely printed - // Dummy test of various printout sytles and techniques + Log("test 2a"); // defualt log function message level is 0. + //Note: calls to the Log function are thread safe. + //Note: tool name is appended to log message ustomatically - Log("test 2",1,m_verbose); - Log("test 3",1,5); - *m_log< #include "Tool.h" +#include "DataModel.h" /** * \class DummyTool @@ -13,8 +14,8 @@ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ + class DummyTool: public Tool { @@ -28,6 +29,7 @@ class DummyTool: public Tool { private: + }; diff --git a/UserTools/Factory/Factory.cpp b/UserTools/Factory/Factory.cpp index eb8e90d..a015d63 100644 --- a/UserTools/Factory/Factory.cpp +++ b/UserTools/Factory/Factory.cpp @@ -5,5 +5,18 @@ Tool* ret=0; // if (tool=="Type") tool=new Type; if (tool=="DummyTool") ret=new DummyTool; +if (tool=="MulticastReceiverSender") ret=new MulticastReceiverSender; +if (tool=="MulticastWorkers") ret=new MulticastWorkers; +if (tool=="DatabaseWorkers") ret=new DatabaseWorkers; +if (tool=="WriteQueryReceiver") ret=new WriteQueryReceiver; +if (tool=="ReadQueryReceiverReplySender") ret=new ReadQueryReceiverReplySender; +if (tool=="WriteWorkers") ret=new WriteWorkers; +if (tool=="Monitoring") ret=new Monitoring; +if (tool=="SocketManager") ret=new SocketManager; +if (tool=="ResultWorkers") ret=new ResultWorkers; +if (tool=="JobManager") ret=new JobManager; +//if (tool=="QueueTrimmer") ret=new QueueTrimmer; +//if (tool=="MiddlemanNegotiate") ret=new MiddlemanNegotiate; + if (tool=="Sleep") ret=new Sleep; return ret; } diff --git a/UserTools/JobManager/JobManager.cpp b/UserTools/JobManager/JobManager.cpp new file mode 100644 index 0000000..df81bd8 --- /dev/null +++ b/UserTools/JobManager/JobManager.cpp @@ -0,0 +1,70 @@ +#include "JobManager.h" + +JobManager::JobManager():Tool(){} + + +bool JobManager::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + // FIXME add to other Tools + LoadConfig(); + + m_data->num_threads=0; // tracker + worker_pool_manager= new WorkerPoolManager(m_data->job_queue, &m_thread_cap, &(m_data->thread_cap), &(m_data->num_threads), nullptr, self_serving); + + ExportConfiguration(); + + return true; +} + + +bool JobManager::Execute(){ + + // TODO add this to other Tools? + if(m_data->change_config){ + InitialiseConfiguration(m_configfile); + LoadConfig(); + ExportConfiguration(); + } + + /* TODO + m_data->monitoring_store_mtx.lock(); + m_data->monitoring_store.Set("pool_threads",worker_pool_manager->NumThreads()); + m_data->monitoring_store.Set("queued_jobs",m_data->job_queue.size()); + m_data->monitoring_store_mtx.unlock(); + // printf("jobmanager q:t = %d:%d\n", m_data->job_queue.size(), worker_pool_manager->NumThreads()); + usleep(1000); + sleep(5); + worker_pool_manager->PrintStats(); + printf("buffersize %u\n", m_data->aggrigation_buffer.size()); + if(worker_pool_manager->NumThreads()==m_thread_cap) m_data->services->SendLog("Warning: Worker Pool Threads Maxed" , 0); //make this a warning + std::cout<<"globalThreads="<num_threads<num_threads--; + + return true; +} + + +// FIXME add to other Tools +void JobManager::LoadConfig(){ + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + if(!m_variables.Get("thread_cap",m_thread_cap)) m_thread_cap = double(std::thread::hardware_concurrency())*0.8; + if(!m_variables.Get("global_thread_cap",m_data->thread_cap)) m_data->thread_cap = m_thread_cap; + if(!m_variables.Get("self_serving", self_serving)) self_serving = true; + return; +} + diff --git a/UserTools/JobManager/JobManager.h b/UserTools/JobManager/JobManager.h new file mode 100644 index 0000000..34ab539 --- /dev/null +++ b/UserTools/JobManager/JobManager.h @@ -0,0 +1,39 @@ +#ifndef JobManager_H +#define JobManager_H + +#include + +#include "Tool.h" +#include "DataModel.h" +#include "WorkerPoolManager.h" + +/** +* \class JobManager +* +* This Tool instantiates a WorkerPoolManager to manage the numer of worker threads for processing multicast messages, write queries and responses. +* +* $Author: Marcus O'Flaherty $ +* $Date: 2025/12/10 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +class JobManager: public Tool { + + public: + JobManager(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to purform Tool purpose. + bool Finalise(); ///< Finalise funciton used to clean up resources. + + private: + bool self_serving; + unsigned int m_thread_cap; + WorkerPoolManager* worker_pool_manager; + + std::string m_configfile; + void LoadConfig(); + +}; + + +#endif diff --git a/UserTools/JobManager/README.md b/UserTools/JobManager/README.md new file mode 100644 index 0000000..2aca3dd --- /dev/null +++ b/UserTools/JobManager/README.md @@ -0,0 +1,19 @@ +# JobManager + +JobManager + +## Data + +Describe any data formats JobManager creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for JobManager. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/Monitoring/Monitoring.cpp b/UserTools/Monitoring/Monitoring.cpp new file mode 100644 index 0000000..4118018 --- /dev/null +++ b/UserTools/Monitoring/Monitoring.cpp @@ -0,0 +1,267 @@ +#include "Monitoring.h" + +Monitoring::Monitoring():Tool(){} + + +bool Monitoring::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + + // how often to write out monitoring stats + int monitoring_period_ms = 60000; + m_variables.Get("monitoring_period_ms",monitoring_period_ms); + + ExportConfiguration(); + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.monitoring_period_ms = std::chrono::milliseconds{monitoring_period_ms}; + thread_args.last_send = std::chrono::steady_clock::now(); + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + thread_mtx.lock(); + thread_args.thread_mtx = &thread_mtx; + if(!m_data->utils.CreateThread("monitoring", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + //m_data->services->AddService("middleman", 5000); // is this needed? what for?? + + return true; +} + + +bool Monitoring::Execute(){ + + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + // FIXME if restarts > X times in last Y mins, alarm (bypass, shove into DB? send to websocket?) and StopLoop. + ++(monitoring_vars.thread_crashes); + } + + return true; +} + + +bool Monitoring::Finalise(){ + + // signal job distributor thread to stop + Log("Joining monitoring thread",v_warning); + thread_args.running=false; + thread_mtx.unlock(); + m_data->utils.KillThread(&thread_args); + Log("thread joined",v_warning); + m_data->num_threads--; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void Monitoring::Thread(Thread_args* args){ + + Monitoring_args* m_args = dynamic_cast(args); + + m_args->last_send = std::chrono::steady_clock::now(); + printf("Monitoring sending stats\n"); + + std::unique_lock locker(m_args->m_data->monitoring_variables_mtx); + + for(std::pair& mon : m_args->m_data->monitoring_variables){ + + std::string s="{\"topic\":\"Monitoring\", \"time\":\"now()\", \"device\":\"middleman\",\"subject\":\""+mon.first+"\", \"data\":"+mon.second->GetJSON()+"}"; + + // use multicast so it also not only goes to DB but also shows up on web services + std::unique_lock locker2(m_args->m_data->out_mon_msg_queue_mtx); + m_args->m_data->out_mon_msg_queue.push_back(s); + + } + + locker.unlock(); + + /* + // FIXME calculate rates and stuff, expand monitoring in Tools + // to calculate rates we need to know the difference in number + // of reads/writes since last time. So get the last values + unsigned long last_write_query_count; + unsigned long last_read_query_count; + unsigned long last_log_count; + unsigned long last_mon_count; + MonitoringStore.Get("write_queries_recvd", last_write_query_count); + MonitoringStore.Get("read_queries_recvd", last_read_query_count); + MonitoringStore.Get("logs_recvd", last_log_count); + MonitoringStore.Get("mons_recvd", last_mon_count); + + // calculate message rates + elapsed_time = boost::posix_time::microsec_clock::universal_time() - last_stats_calc; + + float read_query_rate = (elapsed_time.total_seconds()==0) ? 0 : + ((read_queries_recvd - last_read_query_count) * 60.) / elapsed_time.total_seconds(); + float write_query_rate = (elapsed_time.total_seconds()==0) ? 0 : + ((write_queries_recvd - last_write_query_count) * 60.) / elapsed_time.total_seconds(); + float log_rate = (elapsed_time.total_seconds()==0) ? 0 : + ((logs_recvd - last_log_count) * 60.) / elapsed_time.total_seconds(); + float mon_rate = (elapsed_time.total_seconds()==0) ? 0 : + ((mons_recvd - last_mon_count) * 60.) / elapsed_time.total_seconds(); + + // dump all stats into a Store. + MonitoringStore.Set("min_loop_time",min_loop_ms); + MonitoringStore.Set("max_loop_time",max_loop_ms); + MonitoringStore.Set("loops",loops); + MonitoringStore.Set("loop_rate [Hz]",loops/elapsed_time.total_seconds()); + MonitoringStore.Set("write_queries_waiting",wrt_txn_queue.size()); + MonitoringStore.Set("read_queries_waiting",rd_txn_queue.size()); + MonitoringStore.Set("replies_waiting",resp_queue.size()); + MonitoringStore.Set("incoming_logs_waiting",in_log_queue.size()); + MonitoringStore.Set("incoming_mons_waiting",in_mon_queue.size()); + MonitoringStore.Set("out_multicasts_waiting",out_multicast_queue.size()); + MonitoringStore.Set("cached_queries",cache.size()); + MonitoringStore.Set("mm_broadcasts_recvd", mm_broadcasts_recvd); + MonitoringStore.Set("mm_broadcast_recv_fails", mm_broadcast_recv_fails); + MonitoringStore.Set("mm_broadcasts_sent", mm_broadcasts_sent); + MonitoringStore.Set("mm_broadcasts_failed", mm_broadcasts_failed); + MonitoringStore.Set("master_clashes", master_clashes); + MonitoringStore.Set("master_clashes_failed", master_clashes_failed); + MonitoringStore.Set("standby_clashes", standby_clashes); + MonitoringStore.Set("standby_clashes_failed", standby_clashes_failed); + MonitoringStore.Set("self_promotions", self_promotions); + MonitoringStore.Set("self_promotions_failed", self_promotions_failed); + MonitoringStore.Set("promotions", promotions); + MonitoringStore.Set("promotions_failed", promotions_failed); + MonitoringStore.Set("demotions", demotions); + MonitoringStore.Set("demotions_failed", demotions_failed); + MonitoringStore.Set("dropped_writes", dropped_writes); + MonitoringStore.Set("dropped_reads", dropped_reads); + MonitoringStore.Set("dropped_resps", dropped_resps); + MonitoringStore.Set("dropped_log_in", dropped_log_in); + MonitoringStore.Set("dropped_mon_in", dropped_mon_in); + MonitoringStore.Set("dropped_logs_out", dropped_logs_out); + MonitoringStore.Set("dropped_monitoring_out", dropped_monitoring_out); + MonitoringStore.Set("read_query_rate", read_query_rate); + MonitoringStore.Set("write_query_rate", write_query_rate); + + // convert Store into a json + std::string json_stats; + MonitoringStore >> json_stats; + + // update the web page status + // actually, this only supports a single word, with no spaces? + std::stringstream status; + status << " read qrys (rcvd/rcv errs/qry errs):["<SetValue(status.str()); + +// // temporarily bypass the database logging level to ensure it gets sent to the monitoring db. +// int db_verbosity_tmp = db_verbosity; +// db_verbosity = 10; +// Log(Concat("Monitoring Stats:",json_stats),15); +// db_verbosity = db_verbosity_tmp; + + //std::string sql_qry = "INSERT INTO monitoring ( time, device, subject, data ) VALUES ( 'now()', '" + // + my_id+"','stats','"+json_stats+"' );"; + + std::string multicast_msg = "{ \"topic\":\"monitoring\"" + ", \"subject\":\"stats\"" + ", \"device\":\""+escape_json(my_id)+"\"" + + ", \"time\":"+std::to_string(time(nullptr)*1000) // ms since unix epoch + + ", \"data\":\""+json_stats+"\" }"; + + if(am_master){ + in_mon_queue_mtx.lock(); + in_mon_queue.push_back(multicast_msg); + in_mon_queue_mtx.unlock(); + } else { + out_multicast_queue.push_back(multicast_msg); + } + + min_loop_ms=9999999; + max_loop_ms=0; + loops=0; + + */ + + //std::this_thread::sleep_until(m_args->last_send+m_args->monitoring_period_ms); + // interruptible sleep - breaks early if Tool unlocks thread_mtx + std::unique_lock timed_locker(*m_args->thread_mtx, std::defer_lock); + timed_locker.try_lock_until(m_args->last_send+m_args->monitoring_period_ms); + + return; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +bool Monitoring::ResetStats(bool reset){ +/* + if(!reset) return true; + + min_loop_ms=0; + max_loop_ms=0; + loops=0; + write_queries_recvd=0; + write_query_recv_fails=0; + read_queries_recvd=0; + read_query_recv_fails=0; + logs_recvd=0; + mons_recvd=0; + log_recv_fails=0; + mon_recv_fails=0; + mm_broadcasts_recvd=0; + mm_broadcast_recv_fails=0; + write_queries_failed=0; + log_queries_failed=0; + mon_queries_failed=0; + read_queries_failed=0; + reps_sent=0; + rep_send_fails=0; + multicasts_sent=0; + multicast_send_fails=0; + mm_broadcasts_sent=0; + mm_broadcasts_failed=0; + master_clashes=0; + master_clashes_failed=0; + standby_clashes=0; + standby_clashes_failed=0; + self_promotions=0; + self_promotions_failed=0; + promotions=0; + promotions_failed=0; + demotions=0; + demotions_failed=0; + dropped_writes=0; + dropped_reads=0; + dropped_resps=0; + dropped_log_in=0; + dropped_mon_in=0; + dropped_logs_out=0; + dropped_monitoring_out=0; + + MonitoringStore.Set("write_queries_recvd", 0); + MonitoringStore.Set("read_queries_recvd", 0); + + last_stats_calc = boost::posix_time::microsec_clock::universal_time(); + std::string timestring; + TimeStringFromUnixSec(0, timestring); + SC_vars["ResetStats"]->SetValue(false); +*/ + + return true; +} diff --git a/UserTools/Monitoring/Monitoring.h b/UserTools/Monitoring/Monitoring.h new file mode 100644 index 0000000..b480b4e --- /dev/null +++ b/UserTools/Monitoring/Monitoring.h @@ -0,0 +1,54 @@ +#ifndef Monitoring_H +#define Monitoring_H + +#include +#include +#include +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "MonitoringMonitoring.h" + +/** +* \class Monitoring +* +* This Tool sends out statistics to assist with performance monitoring and debugging +* +* $Author: Marcus O'Flaherty $ +* $Date: 2025/12/11 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +struct Monitoring_args : public Thread_args { + + DataModel* m_data; + MonitoringMonitoring* monitoring_vars; + std::chrono::time_point last_send; + std::chrono::milliseconds monitoring_period_ms; + std::stringstream ss; + std::timed_mutex* thread_mtx; + +}; + +class Monitoring: public Tool { + public: + Monitoring(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose. + bool Finalise(); ///< Finalise function used to clean up resources. + + private: + static void Thread(Thread_args* args); + Monitoring_args thread_args; + MonitoringMonitoring monitoring_vars; + + std::timed_mutex thread_mtx; + + static bool ResetStats(bool reset); + +}; + + +#endif diff --git a/UserTools/Monitoring/MonitoringMonitoring.h b/UserTools/Monitoring/MonitoringMonitoring.h new file mode 100644 index 0000000..b820a13 --- /dev/null +++ b/UserTools/Monitoring/MonitoringMonitoring.h @@ -0,0 +1,23 @@ +#ifndef MonitoringMonitoring_H +#define MonitoringMonitoring_H + +#include "MonitoringVariables.h" + +class MonitoringMonitoring : public MonitoringVariables { + public: + MonitoringMonitoring(){}; + ~MonitoringMonitoring(){}; + + // TODO add more monitoring + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/Monitoring/README.md b/UserTools/Monitoring/README.md new file mode 100644 index 0000000..ff96ec0 --- /dev/null +++ b/UserTools/Monitoring/README.md @@ -0,0 +1,19 @@ +# TrackStats + +TrackStats + +## Data + +Describe any data formats TrackStats creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for TrackStats. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/MulticastReceiverSender/MulticastReceiveMonitoring.h b/UserTools/MulticastReceiverSender/MulticastReceiveMonitoring.h new file mode 100644 index 0000000..7b1e0bc --- /dev/null +++ b/UserTools/MulticastReceiverSender/MulticastReceiveMonitoring.h @@ -0,0 +1,36 @@ +#ifndef MulticastReceiveMonitoring_H +#define MulticastReceiveMonitoring_H + +#include "MonitoringVariables.h" + +class MulticastReceiveMonitoring : public MonitoringVariables { + public: + MulticastReceiveMonitoring(){}; + ~MulticastReceiveMonitoring(){}; + + std::atomic polls_failed; // error polling socket + std::atomic rcv_fails; // error in recv_from + std::atomic send_fails; // error in send_to + std::atomic msgs_rcvd; // messages successfully received + std::atomic msgs_sent; // messages successfully received + std::atomic in_buffer_transfers; // transfers of thread-local message vector to datamodel + std::atomic out_buffer_transfers; // transfers of thread-local message vector to datamodel + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"polls_failed\":"+std::to_string(polls_failed.load()) + +",\"rcv_fails\":"+std::to_string(rcv_fails.load()) + +",\"send_fails\":"+std::to_string(send_fails.load()) + +",\"msgs_rcvd\":"+std::to_string(msgs_rcvd.load()) + +",\"msgs_sent\":"+std::to_string(msgs_sent.load()) + +",\"in_buffer_transfers\":"+std::to_string(in_buffer_transfers.load()) + +",\"out_buffer_transfers\":"+std::to_string(out_buffer_transfers.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/MulticastReceiverSender/MulticastReceiverSender.cpp b/UserTools/MulticastReceiverSender/MulticastReceiverSender.cpp new file mode 100644 index 0000000..5f1add3 --- /dev/null +++ b/UserTools/MulticastReceiverSender/MulticastReceiverSender.cpp @@ -0,0 +1,369 @@ +#include "MulticastReceiverSender.h" + +#include + +namespace { + const uint32_t MAX_UDP_PACKET_SIZE = 655355; +} + +MulticastReceiverSender::MulticastReceiverSender():Tool(){} + + +bool MulticastReceiverSender::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + /* ----------------------------------------- */ + /* Configuration */ + /* ----------------------------------------- */ + + m_verbose=1; + int port = 5000; // shared with service discovery, logging and monitoring + std::string multicast_address; // separate for each + // FIXME slow controls to vary them + int local_buffer_size = 100; + int transfer_period_ms = 1000; + int poll_timeout_ms = 100; + + m_variables.Get("type",type_str); + if(type_str!="logging" && type_str!="monitoring"){ + Log("invalid port type '"+type_str+"'; valid values are 'logging' and 'monitoring'",v_error); + return false; + } + m_variables.Get("verbose",m_verbose); + m_variables.Get("port",port); + if(!m_variables.Get("multicast_address",multicast_address)){ + if(type_str=="logging") multicast_address = "239.192.1.2"; + else multicast_address = "239.192.1.3"; + } + printf("%s binding to %s:%d\n",m_tool_name.c_str(),multicast_address.c_str(),port); + + // buffer received messages in a local vector until size exceeds local_buffer_size... + m_variables.Get("local_buffer_size",local_buffer_size); + // ... or time since last transfer exceeds transfer_period_ms + m_variables.Get("transfer_period_ms",transfer_period_ms); + m_variables.Get("poll_timeout_ms",poll_timeout_ms); + + ExportConfiguration(); + + /* ----------------------------------------- */ + /* Socket Setup */ + /* ----------------------------------------- */ + + socket_handle = socket(AF_INET, SOCK_DGRAM, 0); + if(socket_handle<=0){ + Log(std::string{"Failed to open multicast socket with error "}+strerror(errno),v_error); + return false; + } + + // set linger options - do not linger, discard queued messages on socket close + struct linger l; + l.l_onoff = 0; // whether to linger + l.l_linger = 0; // seconds to linger for + get_ok = setsockopt(socket_handle, SOL_SOCKET, SO_LINGER, (char*) &l, sizeof(l)); + if(get_ok!=0){ + Log(std::string{"Failed to set multicast socket linger with error "}+strerror(errno),v_error); + return false; + } + + // disable blocking connections to this ip+port fomr TIME_WAIT after closure. + // this is intended to prevent delivery of delayed packets to the wrong application, + // but means a new middleman instance won't be able to bind for 30-120 seconds after another closes. + int a =1; + get_ok = setsockopt(socket_handle, SOL_SOCKET, SO_REUSEADDR, &a, sizeof(a)); + if(get_ok!=0){ + Log(std::string{"Failed to set multicast socket reuseaddr with error "}+strerror(errno),v_error); + return false; + } + + // set the socket to non-blocking mode - should be irrelevant as we poll + get_ok = fcntl(socket_handle, F_SETFL, O_NONBLOCK); + if(get_ok!=0){ + Log(std::string{"Failed to set multicast socket to non-blocking with error "}+strerror(errno),v_warning); + } + + + // format destination address from IP string + struct sockaddr_in addr; + socklen_t addrlen = sizeof(addr); + bzero((char *)&addr, addrlen); // init to 0 + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + + // to receive traffic from a specific group, either bind to that group *and* join the group +// inet_aton(multicast_address.c_str(), &addr.sin_addr); + // or bind to INADDR_ANY, disable IP_MULTICAST_ALL, and then join the group + addr.sin_addr.s_addr = htonl(INADDR_ANY); + a=0; + setsockopt(socket_handle, IPPROTO_IP, IP_MULTICAST_ALL, &a, sizeof(int)); + + /* FIXME FIXME FIXME + // sending: which multicast group to send to + get_ok = inet_aton(multicast_address.c_str(), &addr.sin_addr); + if(get_ok==0){ // returns 0 if invalid, unlike other functions + Log("Bad multicast address '"+multicast_address+"'",v_error); + return false; + } + + // for two-way comms, we should bind to INADDR_ANY, not a specific multicast address.... maybe? + struct sockaddr_in multicast_addr2; + bzero((char *)&multicast_addr2, sizeof(multicast_addr2)); // init to 0 + multicast_addr2.sin_family = AF_INET; + multicast_addr2.sin_port = htons(log_port); + multicast_addr2.sin_addr.s_addr = htonl(INADDR_ANY); << like this + + // disable receiving multicast messages we send + a=0; + setsockopt(sock.at(i), SOL_SOCKET, IP_MULTICAST_LOOP, &a, sizeof(a)); + */ + + // to listen we need to bind to the socket + get_ok = (bind(socket_handle, (struct sockaddr*)&addr, addrlen) == 0); + if(!get_ok) { + Log("Failed to bind to multicast listen socket",v_error); + return false; + } + + // and join a multicast group + struct ip_mreq mreq; + mreq.imr_interface.s_addr = htonl(INADDR_ANY); + get_ok = inet_aton(multicast_address.c_str(), &mreq.imr_multiaddr); + if(get_ok==0){ + Log("Bad multicast group '"+multicast_address+"'",v_error); + return false; + } + get_ok = setsockopt(socket_handle, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + if(get_ok!=0){ + Log("Failed to join multicast group",v_error); + return false; + } + + /* ----------------------------------------- */ + /* Thread Setup */ + /* ----------------------------------------- */ + + // monitoring struct to encapsulate tracking info + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.m_tool_name = m_tool_name; + thread_args.monitoring_vars = &monitoring_vars; + thread_args.socket = socket_handle; + thread_args.addr = addr; + thread_args.addrlen = addrlen; + thread_args.poll = zmq::pollitem_t{NULL, socket_handle, ZMQ_POLLIN, 0}; + thread_args.poll_timeout_ms = poll_timeout_ms; + thread_args.local_buffer_size = local_buffer_size; + thread_args.in_local_queue = m_data->multicast_buffer_pool.GetNew(local_buffer_size); + thread_args.in_local_queue->resize(0); + thread_args.last_transfer = std::chrono::steady_clock::now(); + thread_args.transfer_period_ms = std::chrono::milliseconds{transfer_period_ms}; + thread_args.in_queue = &m_data->in_multicast_msg_queue; + thread_args.in_queue_mtx = &m_data->in_multicast_msg_queue_mtx; + if(type_str=="logging"){ + thread_args.out_queue = &m_data->out_log_msg_queue; + thread_args.out_queue_mtx = &m_data->out_log_msg_queue_mtx; + } else { + thread_args.out_queue = &m_data->out_mon_msg_queue; + thread_args.out_queue_mtx = &m_data->out_mon_msg_queue_mtx; + } + + // thread needs a unique name + printf("spawning %s send/receiver thread\n",type_str.c_str()); + if(!m_data->utils.CreateThread(type_str+"_sendreceiver", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + + +bool MulticastReceiverSender::Execute(){ + + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + // FIXME if restarts > X times in last Y mins, alarm (bypass, shove into DB? send to websocket?) and StopLoop. + ++(monitoring_vars.thread_crashes); + } + + // Hmmm, throttling of the main thread is specified in the Sleep tool, but that's fairly short + // that means these variables are being updated thousands of times a second. + // Pro: monitoring info is up-to-date when it goes out + // Con: wasteful... + // FIX: update 1/10th monitoring interval? synchronise with Monitoring Tool? + monitoring_vars.Set("buffered_in_messages",thread_args.in_local_queue->size()); + monitoring_vars.Set("waiting_out_messages",thread_args.out_local_queue.size()); + + /* + actually we can't do this. steady_clock is what we want for regular tasks, + but cannot be converted to a meaningful time unless we manually keep some + reference time for conversion. Even then, it may drift as, by definition, + it does not necessarily stay in sync with system_clock. Not sure what to do about that! + // aren't you glad we have std::chrono to avoid all that c-style time jank? + time_t lt = thread_args.last_transfer.time_since_epoch().count(); + struct tm lt_s; + localtime_r(<, <_s); + char tbuf[50]; + strftime(tbuf, 50, "%F %T%z",<_s); + monitoring_vars.Set("last_transfer",tbuf); + */ + + return true; +} + + +bool MulticastReceiverSender::Finalise(){ + + // signal background receiver thread to stop + //Log("Joining receiver thread",v_warning); + m_data->utils.KillThread(&thread_args); + m_data->num_threads--; + + std::unique_lock locker(m_data->in_multicast_msg_queue_mtx); + m_data->in_multicast_msg_queue.clear(); + locker.unlock(); + + if(type_str=="logging"){ + locker = std::unique_lock(m_data->out_log_msg_queue_mtx); + m_data->out_log_msg_queue.clear(); + } else { + locker = std::unique_lock(m_data->out_mon_msg_queue_mtx); + m_data->out_mon_msg_queue.clear(); + } + + if(socket_handle>0){ + get_ok = close(socket_handle); + if(get_ok!=0){ + Log(std::string{"Error closing socket "}+strerror(errno),v_error); + return false; + } + } + + locker = std::unique_lock(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + +void MulticastReceiverSender::Thread(Thread_args* arg){ + + MulticastReceive_args* m_args=reinterpret_cast(arg); + DataModel* m_data = m_args->m_data; + + // transfer to datamodel + // ===================== + if(!m_args->in_local_queue->empty() && + ((m_args->in_local_queue->size()>m_args->local_buffer_size) || + (std::chrono::steady_clock::now() - m_args->last_transfer) > m_args->transfer_period_ms) ){ + + //printf("adding %d %s messages to datamodel\n",m_args->in_local_queue->size(), m_args->m_tool_name.c_str()); + + std::unique_lock locker(*m_args->in_queue_mtx); + m_args->in_queue->push_back(m_args->in_local_queue); + locker.unlock(); + + m_args->in_local_queue = m_data->multicast_buffer_pool.GetNew(m_args->local_buffer_size); + m_args->in_local_queue->resize(0); + + m_args->last_transfer = std::chrono::steady_clock::now(); + ++(m_args->monitoring_vars->in_buffer_transfers); + } + + + // poll + // ==== + try { + m_args->get_ok = zmq::poll(&m_args->poll, 1, m_args->poll_timeout_ms); + } catch(zmq::error_t& err){ + // ignore poll aborting due to signals + if(zmq_errno()==EINTR) return; + std::cerr<m_tool_name<<" poll caught "<monitoring_vars->polls_failed); + m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? + return; + } + catch(...){ + std::cerr<m_tool_name<<" poll caught "<monitoring_vars->polls_failed); + m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + if(m_args->get_ok<0){ + std::cerr<m_tool_name<<" poll failed with "<monitoring_vars->polls_failed); + m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? + return; + } + + // read + // ==== + if(m_args->poll.revents & ZMQ_POLLIN){ + //printf("%s receiving message\n",m_args->m_tool_name.c_str()); + + // read the messge + m_args->get_ok = recvfrom(m_args->socket, m_args->message, MAX_UDP_PACKET_SIZE, 0, (struct sockaddr*)&m_args->addr, &m_args->addrlen); + if(m_args->get_ok <= 0){ + ++(m_args->monitoring_vars->rcv_fails); + // FIXME better logging + std::cerr<m_tool_name<<": Failed to receive message from " + <addr.sin_addr) // FIXME is this valid on failure? + <<" with error "<monitoring_vars->msgs_rcvd); + //m_data->Log("Received multicast message '"+std::string(m_args->message) + // +"' from "+std::string{inet_ntoa(&m_args->addr->sin_addr)},12); + + m_args->in_local_queue->emplace_back(m_args->message); + + } + } + + // write + // ===== + if(m_args->out_i < m_args->out_local_queue.size()){ + + //printf("%s sending message\n",m_args->m_tool_name.c_str()); + + // Get the message + std::string& message = m_args->out_local_queue[m_args->out_i++]; // always increment, even if error + + // send it + int cnt = sendto(m_args->socket, message.c_str(), message.length()+1, 0, (struct sockaddr*)&m_args->addr, m_args->addrlen); + + // check success + if(cnt < 0){ + //m_data->Log("Error sending multicast message: "+strerror(errno),v_error); // FIXME ensure this isn't circular + ++(m_args->monitoring_vars->send_fails); + } else { + ++(m_args->monitoring_vars->msgs_sent); + } + + } else { + + + // else see if there are any in datamodel to grab + std::unique_lock locker(*m_args->out_queue_mtx); + if(!m_args->out_queue->empty()){ + m_args->out_local_queue.clear(); + //printf("%s fetching new outgoing messages\n",m_args->m_tool_name.c_str()); + std::swap(*m_args->out_queue, m_args->out_local_queue); + ++(m_args->monitoring_vars->out_buffer_transfers); + m_args->out_i=0; + } + locker.unlock(); + + } + + return; +} diff --git a/UserTools/MulticastReceiverSender/MulticastReceiverSender.h b/UserTools/MulticastReceiverSender/MulticastReceiverSender.h new file mode 100644 index 0000000..0590dac --- /dev/null +++ b/UserTools/MulticastReceiverSender/MulticastReceiverSender.h @@ -0,0 +1,76 @@ +#ifndef MulticastReceiverSender_H +#define MulticastReceiverSender_H + +#include +#include +#include +// multicast +#include +#include +#include +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "MulticastReceiveMonitoring.h" + +/** + * \class MulticastReceiverSender + * + * This Tool receives and sends logging or monitoring (multicast) messages via a thread, pushing them to/pulling them from the DataModel. + * + * $Author: M. O'Flaherty $ + * $Date: 2025/11/26 $ + * Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +// class for things passed to multicast listener thread +struct MulticastReceive_args : public Thread_args { + + std::string m_tool_name; + DataModel* m_data; + MulticastReceiveMonitoring* monitoring_vars; + socklen_t addrlen; + struct sockaddr_in addr; + int socket; + int poll_timeout_ms; + zmq::pollitem_t poll; + char message[655355]; // theoretical maximum UDP buffer size - size also hard-coded in thread + int get_ok; + size_t local_buffer_size; + std::vector* in_local_queue; + std::vector out_local_queue; + size_t out_i=0; + + std::vector*>* in_queue; + std::mutex* in_queue_mtx; + std::vector* out_queue; + std::mutex* out_queue_mtx; + + std::chrono::time_point last_transfer; + std::chrono::milliseconds transfer_period_ms; + +}; + +class MulticastReceiverSender: public Tool { + + public: + MulticastReceiverSender(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Executre function used to perform Tool perpose. + bool Finalise(); ///< Finalise funciton used to clean up resorces. + + private: + static void Thread(Thread_args* args); + MulticastReceive_args thread_args; + MulticastReceiveMonitoring monitoring_vars; + + std::string type_str; // "logging" or "monitoring" + int socket_handle; + int get_ok; + std::atomic* thread_crashes; + +}; + +#endif diff --git a/UserTools/MulticastReceiverSender/README.md b/UserTools/MulticastReceiverSender/README.md new file mode 100644 index 0000000..e410e74 --- /dev/null +++ b/UserTools/MulticastReceiverSender/README.md @@ -0,0 +1,19 @@ +# MulticastReceiver + +MulticastReceiver + +## Data + +Describe any data formats MulticastReceiver creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for MulticastReceiver. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/MulticastWorkers/MulticastWorkerMonitoring.h b/UserTools/MulticastWorkers/MulticastWorkerMonitoring.h new file mode 100644 index 0000000..e0ff75a --- /dev/null +++ b/UserTools/MulticastWorkers/MulticastWorkerMonitoring.h @@ -0,0 +1,28 @@ +#ifndef MulticastWorkerMonitoring_H +#define MulticastWorkerMonitoring_H + +#include "MonitoringVariables.h" + +class MulticastWorkerMonitoring : public MonitoringVariables { + public: + MulticastWorkerMonitoring(){}; + ~MulticastWorkerMonitoring(){}; + + std::atomic jobs_failed; + std::atomic jobs_completed; + std::atomic msgs_processed; // each job concatenates a batch of messages; this sums all batches + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"jobs_failed\":"+std::to_string(jobs_failed.load()) + +",\"jobs_completed\":"+std::to_string(jobs_completed.load()) + +",\"msgs_processed\":"+std::to_string(msgs_processed.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/MulticastWorkers/MulticastWorkers.cpp b/UserTools/MulticastWorkers/MulticastWorkers.cpp new file mode 100644 index 0000000..f0cd1db --- /dev/null +++ b/UserTools/MulticastWorkers/MulticastWorkers.cpp @@ -0,0 +1,426 @@ +#include "MulticastWorkers.h" + +MulticastWorkers::MulticastWorkers():Tool(){} + + +bool MulticastWorkers::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + +// // allocate ehhh 60% of the CPU to multicast workers +// int max_workers= (double(std::thread::hardware_concurrency())*0.6); + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; +// m_variables.Get("max_workers",max_workers); + + ExportConfiguration(); + + // potentially we will have a dedicated worker pool for multicast, but for now, + // just one created and managed by JobManager Tool + //job_manager = new WorkerPoolManager(multicast_jobs, &max_workers, 0, 0, 0, true, true); + + // monitoring struct to encapsulate tracking info + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + // thread needs a unique name + if(!m_data->utils.CreateThread("multicast_job_distributor", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + +bool MulticastWorkers::Execute(){ + + // FIXME ok but actually this kills all our jobs, not just our job distributor + // so we don't want to do that. + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + return true; +} + +bool MulticastWorkers::Finalise(){ + + // signal job distributor thread to stop + Log("Joining receiver thread",v_warning); + m_data->utils.KillThread(&thread_args); + m_data->num_threads--; + + // this will invoke kill on the WorkerPoolManager thread creating worker threads, as well as all workers. + //delete job_manager; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + + +void MulticastWorkers::Thread(Thread_args* args){ + + MulticastJobDistributor_args* m_args = dynamic_cast(args); + m_args->local_msg_queue.clear(); + + // grab any batches of logging/monitoring messages + std::unique_lock locker(m_args->m_data->in_multicast_msg_queue_mtx); + if(!m_args->m_data->in_multicast_msg_queue.empty()){ + std::swap(m_args->m_data->in_multicast_msg_queue, m_args->local_msg_queue); + } else { + locker.unlock(); + usleep(100); + return; + } + locker.unlock(); + + // add a job for each batch to the queue + for(int i=0; ilocal_msg_queue.size(); ++i){ + + // add a new Job to the job queue to process this data + Job* the_job = m_args->m_data->job_pool.GetNew("multicast_worker"); + the_job->out_pool = &m_args->m_data->job_pool; + if(the_job->data == nullptr){ + // on first creation of the job, make it a JobStruct to encapsulate its data + // N.B. Pool::GetNew will only invoke the constructor if this is a new instance, + // (not if it's been used before and then returned to the pool) + // so don't pass job-specific variables to the constructor + the_job->data = m_args->job_struct_pool.GetNew(&m_args->job_struct_pool, m_args->m_data, m_args->monitoring_vars); + } else { + // this should never happen as jobs should return their args to the pool + std::cerr<<"Multicast Job with non-null data pointer!"<(the_job->data); + job_data->monitoring_vars = m_args->monitoring_vars; + job_data->m_job_name = "multicast_worker"; + job_data->msg_buffer = m_args->local_msg_queue[i]; + job_data->logging_buffer = m_args->m_data->multicast_batch_pool.GetNew(); + job_data->monitoring_buffer = m_args->m_data->multicast_batch_pool.GetNew(); + job_data->rootplot_buffer = m_args->m_data->multicast_batch_pool.GetNew(); + job_data->plotlyplot_buffer = m_args->m_data->multicast_batch_pool.GetNew(); + + the_job->func = MulticastMessageJob; + the_job->fail_func = MulticastMessageFail; + + //multicast_jobs.AddJob(the_job); + //printf("spawning new multicastjob for %d messages\n",job_data->msg_buffer->size()); + m_args->m_data->job_queue.AddJob(the_job); + + } + + return; +} + + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void MulticastWorkers::MulticastMessageFail(void*& arg){ + + // safety check in case the job somehow fails after returning its args to the pool + if(arg==nullptr){ + std::cerr<<"multicast worker fail with no args"<(arg); + std::cerr<m_job_name<<" failure"<monitoring_vars->jobs_failed); + + // return the vector of string buffers to the pool for re-use by MulticastReceiverSender Tool + m_args->msg_buffer->clear(); + m_args->m_data->multicast_buffer_pool.Add(m_args->msg_buffer); + + // return our job args to the pool + m_args->m_pool->Add(m_args); + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + // FIXME do something here + // we could also try to insert the buffers into the queues for downstream, + // if there were preceding messages that were succesfully added. + // but we don't know where we failed, so that could be risky. + // we could keep track of where we were in m_args and: + // 1. log the specific message we were trying to process when the job failed + // 2. submit the data we already have + // 3. make a new job for the remaining data + + return; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +// Each job takes a vector of messages and converts them into a suitable object, +// then locks and inserts that into a datamodel vector for the database workers +bool MulticastWorkers::MulticastMessageJob(void*& arg){ + + MulticastJobStruct* m_args=static_cast(arg); + + // most efficient way to do insertion would seem to be via jsonb_to_recordset, which allows batching queries, + // query optimisation similar to 'unnest', and avoids the overhead of parsing the JSON: e.g. + // psql -c "INSERT INTO logging ( time, device, severity, message ) SELECT * FROM + // jsonb_to_recordset('[ {\"time\":\"2025-12-01 12:31\", \"device\":\"dev1\", \"severity\":1, \"message\":\"blah\"}, + // {\"time\":\"2025-12-02 15:25\", \"device\":\"dev2\", \"severity\":2, \"message\":\"arg\"} ]') + // as t(time timestamptz, device text, severity int, message text);" << (this part is needed) + + // or: + // PREPARE loginsert ( text ) AS INSERT INTO logging ( time, device, severity, message ) SELECT * FROM jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, severity int, message text); + // then: + // execute loginsert('[ {"time":"2025-12-01 12:31", "device":"dev1", "severity":1, "message":"blah"}, {"time":"2025-12-02 15:25", "device":"dev2", "severity":2, "message":"oooh"} ]'); + + // subsequently, all we need to do here is concatenate the JSONs + + //printf("%s processing %d batches\n",m_args->m_job_name.c_str(), m_args->msg_buffer->size()); + + *m_args->logging_buffer = "["; + *m_args->monitoring_buffer = "["; + *m_args->rootplot_buffer = "["; + *m_args->plotlyplot_buffer = "["; + + // loop over messages + for(std::string& next_msg : *m_args->msg_buffer){ + + // we can't batch insertions destined for different tables, + // so keep each message type (topic) in a different buffer. + // the Services class always puts the topic first, + // and all topics start with a unique character (XXX for now?), + // so we don't need to parse the message to identify the topic: +// printf("validating first 9 chars are topic: '%s', %d\n",next_msg.substr(0,9).c_str(),strcmp(next_msg.substr(0,9).c_str(),"{\"topic\":")); + if(next_msg.substr(0,9)!="{\"topic\":"){ + // FIXME log it as bad multicast + printf("%s ignoring bad multicast message '%s'\n",m_args->m_job_name.c_str(), next_msg.c_str()); + continue; + } + + switch(query_topic{next_msg[10]}){ + case query_topic::logging: + m_args->out_buffer = m_args->logging_buffer; + break; + case query_topic::monitoring: + m_args->out_buffer = m_args->monitoring_buffer; + break; + case query_topic::rootplot: + m_args->out_buffer = m_args->rootplot_buffer; + break; + case query_topic::plotlyplot: + m_args->out_buffer = m_args->plotlyplot_buffer; + break; + default: + printf("%s unknown multicast topic '%c' in message '%s'\n",m_args->m_job_name.c_str(), next_msg[10],next_msg.c_str()); + continue; // FIXME unknown topic: error log it. + } + + if(m_args->out_buffer->length()>1) (*m_args->out_buffer) += ", "; + (*m_args->out_buffer) += next_msg; + //printf("%s added message '%s'\n",m_args->m_job_name.c_str(), next_msg.c_str()); + + ++(m_args->monitoring_vars->msgs_processed); + + } + + // pass into datamodel for DatabaseWorkers + if(m_args->logging_buffer->length()!=1){ + *m_args->logging_buffer += "]"; + std::unique_lock locker(m_args->m_data->log_query_queue_mtx); + m_args->m_data->log_query_queue.push_back(m_args->logging_buffer); + //printf("%s adding '%s' to logging buffer\n",m_args->m_job_name.c_str(), m_args->logging_buffer->c_str()); + } + + if(m_args->monitoring_buffer->length()!=1){ + *m_args->monitoring_buffer += "]"; + std::unique_lock locker(m_args->m_data->mon_query_queue_mtx); + m_args->m_data->mon_query_queue.push_back(m_args->monitoring_buffer); + } + + if(m_args->rootplot_buffer->length()!=1){ + *m_args->rootplot_buffer += "]"; + std::unique_lock locker(m_args->m_data->rootplot_query_queue_mtx); + m_args->m_data->rootplot_query_queue.push_back(m_args->rootplot_buffer); + } + + if(m_args->plotlyplot_buffer->length()!=1){ + *m_args->plotlyplot_buffer += "]"; + std::unique_lock locker(m_args->m_data->plotlyplot_query_queue_mtx); + m_args->m_data->plotlyplot_query_queue.push_back(m_args->plotlyplot_buffer); + } + + // return the vector of string buffers to the pool for re-use by MulticastReceiverSender Tool + m_args->msg_buffer->clear(); + m_args->m_data->multicast_buffer_pool.Add(m_args->msg_buffer); + + //printf("%s job completed\n",m_args->m_job_name.c_str()); + ++(m_args->monitoring_vars->jobs_completed); + + m_args->m_pool->Add(m_args); // return our job args to the job args struct pool + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return true; + +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +/* +// // used with v0 batching +// static const std::string log_base = "INSERT INTO logging ( time, device, severity, message ) VALUES "; +// static const std::string mon_base = "INSERT INTO monitoring ( time, device, subject, data ) VALUES "; + +// Each job takes a vector of messages and converts them into a suitable object, +// then locks and inserts that into a datamodel vector for the database workers +void MulticastWorkers::MulticastMessageJob(void* arg){ + + //================= + // v0: combine all multicasts into a batch sql query (curent version) + // note! supposedly cannot use this with pqxx::pipeline (see 'pqxx::pipeline::insert') + // although if we're not interested in any return values, maybe it's ok...? + + // v1: insert into pipeline - does batching for you, so maybe equivalent to v0? + // accepts a std::stringview of a query, so still need to do sanitization yourself, + // and be mindful of lifetime of the query you pass it! + + // v2: turn each multicast into a form suitable for use with pqxx::stream + // the fastest method is pqxx::stream::write_values(T...) which accepts a set of variables + // less preferred is write_row or operator<< both of which accept a container or tuple + + // v3: turn each multicast message into a pqxx::params object to be used with a prepared statement (pqxx::prepped) + + // v4: transpose the data and use unnest to pass multiple rows as a set of columns + // this should be close in performance to COPY (stream) + // psql -c "INSERT INTO logging ( time, device, severity, message ) SELECT * FROM + // UNNEST(ARRAY['2025-12-01 12:31', '2025-12-02 15:23']::timestamptz[], + // ARRAY['dev1', 'dev2'], + // '{1,2}'::int[], << alternative way to define an array of ints (note bracket change) + // '{\"blah\", \"argh\"}'::text[])" << for array of strings need internal quoting + + // v5: just insert the JSON directly 5-head + // psql -c "INSERT INTO logging ( time, device, severity, message ) SELECT * FROM + // jsonb_to_recordset('[ {\"time\":\"2025-12-01 12:31\", \"device\":\"dev1\", \"severity\":1, \"message\":\"blah\"}, + // {\"time\":\"2025-12-02 15:25\", \"device\":\"dev2\", \"severity\":2, \"message\":\"arg\"} ]') + // as t(time timestamptz, device text, severity int, message text);" << this part is needed + + PREPARE moninsert ( text ) as INSERT INTO monitoring ( time, device, subject, data ) select * from jsonb_to_recordset( $1::jsonb ) as t(time timestamptz, device text, subject text, data jsonb ); + execute moninsert('[ {"time":"2025-12-03 12:22", "device":"dev3", "subject":"test", "data":{"testkey":"testval", "key2":3} }, {"time":"2025-12-03 13:23", "device":"dev3", "subject":"test", "data":{"testkey":"testval2", "key2":4} } ]' ); + + //================== + + MulticastJobStruct* m_args=static_cast(arg); + + // v0: pre-populate query with base + m_args->out_buffer = m_args->query_base; + + // loop over messages, parse each into an SQL query + for(std::string& next_msg : *m_args->msg_buffer){ + +// // parse message +// thread_local MulticastMsg msg; +// msg.Clear(); +// if(!msg.Parse(message)){ +// Log("MulticastMessageToQuery error parsing message json '"+message+"'",v_error); // FIXME track, report +// continue; +// } + + // we can't batch insert of records into different tables + // so keep each message type (topic) in a different buffer +// if(msg.topic=="logging"){ +// m_args->out_buffer = m_args->logging_buffer; +// } else if(topic=="monitoring"){ +// m_args->out_buffer = m_args->monitoring_buffer; +// } else if(topic=="rootplot"){ +// m_args->out_buffer = m_args->rootplot_buffer; +// } else if(msg.topic=="plotlyplot"){ +// m_args->out_buffer = m_args->plotlyplot_buffer; +// } + +// // v0: concatenate to batch query +// m_args->out_buffer += msg.GetString(m_args->first_vals); // FIXME: sanitization + +// // v1: insert into pipeline +// m_args->out_buffer.insert(msg.GetString()); // FIXME lifetime of this string needs to persist.... how to do? + +// // v2: append to queue of tuples for stream // n.b. need to split into GetMonitoringTuple/GetLoggingTuple +// m_args->out_buffer->push_back(msg.GetTuple()); // because the tuple types are different +// or +// m_args->out_buffer->push_back(msg); // what's the deal here? +// // well the preferred way to use a stream is pqxx::stream_to(a, b c) for variables a,b,c +// // we implement this via Msg::StreamRow(pqxx::stream_to), but this Tool doesn't have the pqxx::stream_to +// +// // v3: append to queue of pqxx::params objects for prepared statement +// m_args->out_buffer->push_back(msg.GetParams()); + +// // v4: unnest +// m_args->out_buffer.Append(msg); +// // it's going to be some kind of struct that internally has strings for a list of timestamps, +// // device names, severities and messages. Append adds this messages' new values to each. + + ++m_args->n_queries; // FIXME make atomic, stats tracking + + } + +// // v0: terminate this batch with semicolon +// m_args->out_buffer += ";"; + + ... + +} +*/ diff --git a/UserTools/MulticastWorkers/MulticastWorkers.h b/UserTools/MulticastWorkers/MulticastWorkers.h new file mode 100644 index 0000000..4207f43 --- /dev/null +++ b/UserTools/MulticastWorkers/MulticastWorkers.h @@ -0,0 +1,70 @@ +#ifndef MulticastWorkers_H +#define MulticastWorkers_H + +#include + +#include "Tool.h" +#include "DataModel.h" +#include "MulticastWorkerMonitoring.h" + +/** +* \class MulticastWorkers +* +* This Tool uses a worker pool to process batches of multicast messages (received in JSON format), separates them based on their topic (i.e. destination table) and prepares them for insertion into the database by database workers. This preparation may include batching messages, decoding the JSON into SQL, extraction of JSON variables into parameter packs, etc. Presently, it batches the JSON for use with postgres jsonb_to_recordset. +* +* $Author: M. O'Flaherty $ +* $Date: 2025/12/04 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +// class for things passed to multicast worker threads +struct MulticastJobStruct { + + MulticastJobStruct(Pool* pool, DataModel* data, MulticastWorkerMonitoring* mon) : m_pool(pool), m_data(data), monitoring_vars(mon){}; + DataModel* m_data; + MulticastWorkerMonitoring* monitoring_vars; + Pool* m_pool; + std::string m_job_name; + std::vector* msg_buffer; + std::string* logging_buffer; + std::string* monitoring_buffer; + std::string* rootplot_buffer; + std::string* plotlyplot_buffer; + std::string* out_buffer; + +}; + +struct MulticastJobDistributor_args : Thread_args { + + DataModel* m_data; + MulticastWorkerMonitoring* monitoring_vars; + std::vector*> local_msg_queue; // swap with datamodel and then pass out to jobs + Pool job_struct_pool{true, 1000, 100}; ///< pool for job objects used by worker threads + +}; + +class MulticastWorkers: public Tool { + + public: + MulticastWorkers(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Executre function used to perform Tool perpose. + bool Finalise(); ///< Finalise funciton used to clean up resorces. + + private: + static void Thread(Thread_args* args); ///< job distributor thread function that pulls batches of multicast messages from upstream and passes them to the job queue + MulticastJobDistributor_args thread_args; ///< args for the child thread that produces and distributes jobs to the worker farm + MulticastWorkerMonitoring monitoring_vars; + + static bool MulticastMessageJob(void*& arg); ///< job function that prepares a batch of multicast messages for DB entry + static void MulticastMessageFail(void*& arg); ///< job fail function, perform cleanup to return multicast buffer and job args struct to their respective Pools + + // for now use shared ones in datamodel + //WorkerPoolManager* job_manager=nullptr; ///< manager for worker farm, has internal background thread that spawns new jobs and or prunes them, along with tracking statistics + //JobQueue multicast_jobs; ///< job queue for worker farm + +}; + + + +#endif diff --git a/UserTools/MulticastWorkers/README.md b/UserTools/MulticastWorkers/README.md new file mode 100644 index 0000000..5ae3ac4 --- /dev/null +++ b/UserTools/MulticastWorkers/README.md @@ -0,0 +1,19 @@ +# MulticastWorkers + +MulticastWorkers + +## Data + +Describe any data formats MulticastWorkers creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for MulticastWorkers. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/ReadQueryReceiverReplySender/README.md b/UserTools/ReadQueryReceiverReplySender/README.md new file mode 100644 index 0000000..11b9561 --- /dev/null +++ b/UserTools/ReadQueryReceiverReplySender/README.md @@ -0,0 +1,19 @@ +# ReadReceiverReplySender + +ReadReceiverReplySender + +## Data + +Describe any data formats ReadReceiverReplySender creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for ReadReceiverReplySender. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.cpp b/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.cpp new file mode 100644 index 0000000..32023dd --- /dev/null +++ b/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.cpp @@ -0,0 +1,435 @@ +#include "ReadQueryReceiverReplySender.h" + +ReadQueryReceiverReplySender::ReadQueryReceiverReplySender():Tool(){} + +bool ReadQueryReceiverReplySender::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + + /* ----------------------------------------- */ + /* Configuration */ + /* ----------------------------------------- */ + + remote_port_name = "db_read"; + // FIXME do these timeouts need to be << transfer_period_ms? + int rcv_timeout_ms=500; + int snd_timeout_ms=500; + int poll_timeout_ms=500; + int rcv_hwm=10000; // FIXME sufficient? + int conns_backlog=1000; // FIXME sufficient? + int local_buffer_size = 200; + int transfer_period_ms = 200; + + m_variables.Get("remote_port_name", remote_port_name); + m_variables.Get("rcv_hwm", rcv_hwm); // max num outstanding messages in receive buffer + m_variables.Get("conns_backlog", conns_backlog); // max num oustanding connection requests + m_variables.Get("poll_timeout_ms",poll_timeout_ms); + m_variables.Get("snd_timeout_ms",snd_timeout_ms); + m_variables.Get("rcv_timeout_ms",rcv_timeout_ms); + m_variables.Get("local_buffer_size", local_buffer_size); + m_variables.Get("transfer_period_ms", transfer_period_ms); + + ExportConfiguration(); + + /* ----------------------------------------- */ + /* Socket Setup */ + /* ----------------------------------------- */ + + // A ROUTER socket is used for read queries as it naturally load balances + // (since read queries can be handled by both master/slave middlemen and will be round-robined between them) + // and is also used to asynchronously send both read and write query acknowledgements/replies + + ManagedSocket* managed_socket = new ManagedSocket; + managed_socket->service_name=""; // attach to any client type... + managed_socket->remote_port_name = remote_port_name; // ...that advertises a service on port 'remote_port_name' + managed_socket->socket = new zmq::socket_t(*m_data->context, ZMQ_ROUTER); + managed_socket->socket->setsockopt(ZMQ_SNDTIMEO, snd_timeout_ms); + managed_socket->socket->setsockopt(ZMQ_RCVTIMEO, rcv_timeout_ms); + managed_socket->socket->setsockopt(ZMQ_RCVHWM,rcv_hwm); + managed_socket->socket->setsockopt(ZMQ_BACKLOG,conns_backlog); + managed_socket->socket->setsockopt(ZMQ_LINGER, 10); + // make reply socket error, rather than silently drop, if the destination is unreachable + managed_socket->socket->setsockopt(ZMQ_ROUTER_MANDATORY, 1); + // make router transfer connections with an already seen ZMQ_IDENTITY to a new connection + // rather than rejecting the new connection attempt + // FIXME need to update ZMQ version to enable, but we should do this + /* + try{ + managed_socket->socket->setsockopt(ZMQ_ROUTER_HANDOVER, 1); + } catch(std::exception& e){ + std::cout<<"caught "< locker(m_data->managed_sockets_mtx); + m_data->managed_sockets[remote_port_name] = managed_socket; + + /* ----------------------------------------- */ + /* Thread Setup */ + /* ----------------------------------------- */ + + // monitoring struct to encapsulate tracking info + locker =std::unique_lock(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.m_tool_name = m_tool_name; + thread_args.monitoring_vars = &monitoring_vars; + thread_args.mgd_sock = managed_socket; + thread_args.poll_timeout_ms = poll_timeout_ms; + thread_args.in_poll = zmq::pollitem_t{*managed_socket->socket,0,ZMQ_POLLIN,0}; + thread_args.out_poll = zmq::pollitem_t{*managed_socket->socket,0,ZMQ_POLLOUT,0}; + thread_args.in_local_queue = m_data->querybatch_pool.GetNew(local_buffer_size); + thread_args.make_new = true; + thread_args.local_buffer_size = local_buffer_size; + thread_args.transfer_period_ms = std::chrono::milliseconds{transfer_period_ms}; + thread_args.last_transfer = std::chrono::steady_clock::now(); + + // thread needs a unique name + if(!m_data->utils.CreateThread("readrep_sendreceiver", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + + +bool ReadQueryReceiverReplySender::Execute(){ + + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + // FIXME add monitoring info: queue sizes + + return true; +} + + +bool ReadQueryReceiverReplySender::Finalise(){ + + // signal background receiver thread to stop + Log("Joining receiver thread",v_warning); + m_data->utils.KillThread(&thread_args); + Log("thread terminated",v_warning); + m_data->num_threads--; + + std::unique_lock locker(m_data->managed_sockets_mtx); + if(m_data->managed_sockets.count(remote_port_name)){ + ManagedSocket* sock = m_data->managed_sockets[remote_port_name]; + m_data->managed_sockets.erase(remote_port_name); + locker.unlock(); + if(sock->socket) delete sock->socket; // destructor closes socket + delete sock; + } + + locker = std::unique_lock(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void ReadQueryReceiverReplySender::Thread(Thread_args* args){ + + ReadQueryReceiverReplySender_args* m_args = reinterpret_cast(args); + + // transfer to datamodel + // ===================== + if(m_args->in_local_queue->queries.size() >= m_args->local_buffer_size || + (std::chrono::steady_clock::now() - m_args->last_transfer) > m_args->transfer_period_ms){ + + if(!m_args->in_local_queue->queries.empty()){ + + if(!m_args->make_new) m_args->in_local_queue->queries.pop_back(); + + //printf("%s adding %ld messages to datamodel\n",m_args->m_tool_name.c_str(),m_args->in_local_queue->queries.size()); + + //m_args->in_local_queue->push_time("receiver_to_DM"); + std::unique_lock locker(m_args->m_data->read_msg_queue_mtx); + m_args->m_data->read_msg_queue.push_back(m_args->in_local_queue); + locker.unlock(); + + m_args->in_local_queue = m_args->m_data->querybatch_pool.GetNew(m_args->local_buffer_size); + + m_args->make_new=true; + ++(m_args->monitoring_vars->in_buffer_transfers); + + } + + m_args->last_transfer = std::chrono::steady_clock::now(); + + } + + + // poll + // ==== + try { + m_args->get_ok=0; + // give priority to socket manager, otherwise we may lock it too frequently and prevent it getting access + while(m_args->mgd_sock->socket_manager_request){ + usleep(1); + } + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + m_args->get_ok = zmq::poll(&m_args->in_poll, 1, m_args->poll_timeout_ms); + } catch(zmq::error_t& err){ + // ignore poll aborting due to signals + if(zmq_errno()==EINTR) return; // this is probably fine + std::cerr<m_tool_name<<" in poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + usleep(10); + return; + } + catch(std::exception& err){ + std::cerr<m_tool_name<<" in poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + usleep(10); + return; + } catch(...){ + std::cerr<m_tool_name<<" in poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + usleep(10); + return; + } + if(m_args->get_ok<0){ + std::cerr<m_tool_name<<" in poll failed with "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + usleep(10); + return; + } + + // read + // ==== + if(m_args->in_poll.revents & ZMQ_POLLIN){ + //printf("%s receiving message\n",m_args->m_tool_name.c_str()); + + if(m_args->make_new){ + m_args->in_local_queue->queries.emplace_back(); + m_args->make_new = false; + } + ZmqQuery& msg_buf = m_args->in_local_queue->queries.back(); + msg_buf.parts.resize(4); + // received parts are [client, topic, msg_id, query] + // reorder parts on receipt as client and msg_id will be left untouched and re-used for response + static constexpr char part_order[4] = {0,2,1,3}; + m_args->msg_parts=0; + + // debug only, remove + //msg_buf.times.clear(); + //msg_buf.push_time("recieve"); + + try { + + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + //printf("%s receiving part...",m_args->m_tool_name.c_str()); + do { + m_args->get_ok = m_args->mgd_sock->socket->recv(&msg_buf[part_order[std::min(3,m_args->msg_parts++)]]); + //printf("%d=%d (more: %d),...",m_args->msg_parts,m_args->get_ok,msg_buf[part_order[std::min(3,m_args->msg_parts-1)]].more()); + } while(m_args->get_ok && msg_buf[part_order[std::min(3,m_args->msg_parts-1)]].more()); + locker.unlock(); + //printf("\n"); + + // if the read failed, discard the message + if(!m_args->get_ok){ + + std::cerr<m_tool_name<<" receive failed with "<monitoring_vars->rcv_fails); + + // if there weren't 4 parts, discard the message + } else if(m_args->msg_parts!=4){ + + std::cerr<m_tool_name<<": Unexpected "<msg_parts<<" part message"<msg_parts; ++i){ + char msg_str[msg_buf[part_order[i]].size()]; + snprintf(&msg_str[0], msg_buf[part_order[i]].size()+1, "%s", msg_buf[part_order[i]].data()); + printf("\tpart %d: %s\n",i, msg_str); + } + // FIXME Log this? here? do we add a flag for bad and do it in the processing? + ++(m_args->monitoring_vars->bad_msgs); + + // else success + } else { + + m_args->make_new=true; + ++(m_args->monitoring_vars->msgs_rcvd); + // XXX + //printf("%s received query %u, '%s' message '%s' into ZmqQuery at %p\n",m_args->m_tool_name.c_str(), msg_buf.msg_id(), msg_buf.topic().data(), msg_buf.msg().data(), &msg_buf); + + } + + } catch(zmq::error_t& err){ + // receive aborted due to signals? + if(zmq_errno()==EINTR) return; // FIXME this is probably not appropriate: should resume receive? + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } catch(std::exception& err){ + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } catch(...){ + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } + + } // else no messages from clients + + // write + // ===== + //m_args->m_data->Log("Size of reply queue is "+ + // (m_args->out_local_queue ? std::to_string(m_args->out_local_queue.size()) : std::string{"0"}),10); + + // send next response message, if we have one in the queue + if(m_args->out_local_queue!=nullptr && m_args->out_iout_local_queue->queries.size()){ + + // poll + // ==== + try { + m_args->get_ok=0; + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + m_args->get_ok = zmq::poll(&m_args->out_poll, 1, m_args->poll_timeout_ms); + } catch(zmq::error_t& err){ + // ignore poll aborting due to signals + if(zmq_errno()==EINTR) return; // this is probably fine + std::cerr<m_tool_name<<" out poll caught "<monitoring_vars->polls_failed); + // m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + catch(std::exception& err){ + std::cerr<m_tool_name<<" out poll caught "<monitoring_vars->polls_failed); + // m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } catch(...){ + std::cerr<m_tool_name<<" out poll caught "<monitoring_vars->polls_failed); + // m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + if(m_args->get_ok<0){ + std::cerr<m_tool_name<<" out poll failed with "<monitoring_vars->polls_failed); + // m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + + // check we had a listener ready + if(m_args->out_poll.revents & ZMQ_POLLOUT){ + + //printf("%s sending reply %d/%d\n",m_args->m_tool_name.c_str(),m_args->out_i,m_args->out_local_queue->queries.size()); + + ZmqQuery& rep = m_args->out_local_queue->queries[m_args->out_i++]; + // FIXME maybe don't pop (increment out_i) until send succeeds? + // FIXME maybe impelement 'retries' mechanism as previously? + + // response parts are [client,msg_id, success, results...] + + //printf("reply to message %u has %d parts\n", rep.msg_id(), rep.size()); + /* + uint32_t turnaround = std::chrono::duration_cast(std::chrono::system_clock::now()-rep.times[0].second).count(); + if(rep.size()>3){ + printf("%s turnaround of %u ms on response '%s' to message %u\n",m_args->m_tool_name.c_str(), turnaround, rep[3].data(), *(uint32_t*)(rep[1].data())); + } else { + printf("%s turnaround of %u ms on ack %u to message %u\n",m_args->m_tool_name.c_str(), turnaround, *(uint32_t*)rep[2].data(), *(uint32_t*)(rep[1].data())); + } + rep.print_times(); + */ + + + try { + + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + for(size_t i=0; iget_ok = m_args->mgd_sock->socket->send(rep[i], ZMQ_SNDMORE); + if(!m_args->get_ok) break; + } + if(m_args->get_ok) m_args->get_ok = m_args->mgd_sock->socket->send(rep[rep.size()-1]); + locker.unlock(); + + if(!m_args->get_ok){ + std::cerr<m_tool_name<<": send failed with "<monitoring_vars->send_fails); // FIXME or move into below if we retry? or track both? + /* + if(next_msg.retries>=max_send_attempts){ + resp_queue.erase(resp_queue.begin()->first); + } else { + ++next_msg.retries; + } + */ + return; + } + // FIXME if we do implement re-sending, then do not do this + rep.parts.resize(0); // safety to prevent accidentally accessing sent messages, which can segfault + + // else success + //printf("%s reply at %p sent\n",m_args->m_tool_name.c_str(), &rep); + ++(m_args->monitoring_vars->msgs_sent); + + } catch(zmq::error_t& err){ + // send aborted due to signals? + if(zmq_errno()==EINTR) return; // FIXME is this appropriate here? + std::cerr<m_tool_name<<" send caught "<monitoring_vars->send_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } catch(std::exception& e){ + std::cerr<m_tool_name<<" send caught "<monitoring_vars->send_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } catch(...){ + std::cerr<m_tool_name<<" send caught "<monitoring_vars->send_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + } + + } // else no available listeners + + } else { + + // no responses to send - see if there's any in the DataModel + std::unique_lock locker(m_args->m_data->query_replies_mtx); + if(!m_args->m_data->query_replies.empty()){ + + //printf("%s fetching new replies\n",m_args->m_tool_name.c_str()); + + // return our batch to the pool if applicable + if(m_args->out_local_queue!=nullptr){ + m_args->out_local_queue->queries.clear(); + m_args->m_data->querybatch_pool.Add(m_args->out_local_queue); + m_args->out_local_queue = nullptr; + } + + // grab a new batch + m_args->out_local_queue = m_args->m_data->query_replies.front(); + //m_args->out_local_queue->push_time("reply_fetch"); + m_args->m_data->query_replies.pop_front(); + + ++(m_args->monitoring_vars->out_buffer_transfers); + + // start sending from the beginning + m_args->out_i=0; + } + locker.unlock(); + + } + + return; +} diff --git a/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.h b/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.h new file mode 100644 index 0000000..091f78d --- /dev/null +++ b/UserTools/ReadQueryReceiverReplySender/ReadQueryReceiverReplySender.h @@ -0,0 +1,66 @@ +#ifndef ReadQueryReceiverReplySender_H +#define ReadQueryReceiverReplySender_H + +#include + +#include "Tool.h" +#include "DataModel.h" +#include "ReadReceiveMonitoring.h" + +/** + * \class ReadQueryReceiverReplySender + * + * This Tool gets read queries from a ZMQ ROUTER socket and send replies as well as write query acknowledgements. + * + * $Author: Marcus O'Flaherty $ + * $Date: 2025/11/27 $ + * Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +struct ReadQueryReceiverReplySender_args : public Thread_args { + + std::string m_tool_name; + DataModel* m_data; + ReadReceiveMonitoring* monitoring_vars; + ManagedSocket* mgd_sock=nullptr; + + int poll_timeout_ms; + zmq::pollitem_t in_poll; + zmq::pollitem_t out_poll; + zmq::message_t msg_discard; + bool make_new; + int msg_parts; + int get_ok; + QueryBatch* in_local_queue; + QueryBatch* out_local_queue; + size_t out_i; ///< which query in the batch is next to sent + + // for received buffer transfers + // FIXME we don't track last time of outgoing buffer transfer? + std::chrono::time_point last_transfer; + std::chrono::milliseconds transfer_period_ms; + size_t local_buffer_size; + +}; + +class ReadQueryReceiverReplySender: public Tool { + + public: + ReadQueryReceiverReplySender(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Executre function used to perform Tool perpose. + bool Finalise(); ///< Finalise funciton used to clean up resorces. + + private: + static void Thread(Thread_args* args); + ReadQueryReceiverReplySender_args thread_args; + ReadReceiveMonitoring monitoring_vars; + + std::string remote_port_name; // name by which clients advertise sockets for sending read queries to the DB + +}; + + + + +#endif diff --git a/UserTools/ReadQueryReceiverReplySender/ReadReceiveMonitoring.h b/UserTools/ReadQueryReceiverReplySender/ReadReceiveMonitoring.h new file mode 100644 index 0000000..d92b7f8 --- /dev/null +++ b/UserTools/ReadQueryReceiverReplySender/ReadReceiveMonitoring.h @@ -0,0 +1,38 @@ +#ifndef ReadReceiveMonitoring_H +#define ReadReceiveMonitoring_H + +#include "MonitoringVariables.h" + +class ReadReceiveMonitoring : public MonitoringVariables { + public: + ReadReceiveMonitoring(){}; + ~ReadReceiveMonitoring(){}; + + std::atomic polls_failed; // error polling socket + std::atomic rcv_fails; // error in recv_from + std::atomic send_fails; // error in send_to + std::atomic msgs_rcvd; // messages successfully received + std::atomic msgs_sent; // messages successfully received + std::atomic bad_msgs; // messages with the wrong number of zmq parts + std::atomic in_buffer_transfers; // transfers of thread-local message vector to datamodel + std::atomic out_buffer_transfers; // transfers of thread-local message vector to datamodel + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"polls_failed\":"+std::to_string(polls_failed.load()) + +",\"rcv_fails\":"+std::to_string(rcv_fails.load()) + +",\"send_fails\":"+std::to_string(send_fails.load()) + +",\"msgs_rcvd\":"+std::to_string(msgs_rcvd.load()) + +",\"bad_msgs\":"+std::to_string(bad_msgs.load()) + +",\"msgs_sent\":"+std::to_string(msgs_sent.load()) + +",\"in_buffer_transfers\":"+std::to_string(in_buffer_transfers.load()) + +",\"out_buffer_transfers\":"+std::to_string(out_buffer_transfers.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/ResultWorkers/README.md b/UserTools/ResultWorkers/README.md new file mode 100644 index 0000000..f094517 --- /dev/null +++ b/UserTools/ResultWorkers/README.md @@ -0,0 +1,19 @@ +# ResultWorkers + +ResultWorkers + +## Data + +Describe any data formats ResultWorkers creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for ResultWorkers. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/ResultWorkers/ResultWorkerMonitoring.h b/UserTools/ResultWorkers/ResultWorkerMonitoring.h new file mode 100644 index 0000000..91228d5 --- /dev/null +++ b/UserTools/ResultWorkers/ResultWorkerMonitoring.h @@ -0,0 +1,33 @@ +#ifndef ResultWorkerMonitoring_H +#define ResultWorkerMonitoring_H + +#include "MonitoringVariables.h" + +class ResultWorkerMonitoring : public MonitoringVariables { + public: + ResultWorkerMonitoring(){}; + ~ResultWorkerMonitoring(){}; + + std::atomic read_batches_processed; + std::atomic write_batches_processed; + std::atomic jobs_failed; + std::atomic jobs_completed; + std::atomic result_access_errors; + + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"read_batches_processed\":"+std::to_string(read_batches_processed.load()) + +",\"write_batches_processed\":"+std::to_string(write_batches_processed.load()) + +",\"result_access_errors\":"+std::to_string(result_access_errors.load()) + +",\"jobs_completed\":"+std::to_string(jobs_completed.load()) + +",\"jobs_failed\":"+std::to_string(jobs_failed.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/ResultWorkers/ResultWorkers.cpp b/UserTools/ResultWorkers/ResultWorkers.cpp new file mode 100644 index 0000000..6f6e5e2 --- /dev/null +++ b/UserTools/ResultWorkers/ResultWorkers.cpp @@ -0,0 +1,354 @@ +#include "ResultWorkers.h" + +ResultWorkers::ResultWorkers():Tool(){} + + +bool ResultWorkers::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + + ExportConfiguration(); + + // monitoring struct to encapsulate tracking info + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + if(!m_data->utils.CreateThread("result_job_distributor", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + + +bool ResultWorkers::Execute(){ + + // FIXME ok but actually this kills all our jobs, not just our job distributor + // so we don't want to do that. + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + return true; +} + + +bool ResultWorkers::Finalise(){ + + // signal job distributor thread to stop + Log("Joining receiver thread",v_warning); + m_data->utils.KillThread(&thread_args); + m_data->num_threads--; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + + +void ResultWorkers::Thread(Thread_args* args){ + + ResultJobDistributor_args* m_args = reinterpret_cast(args); + + // grab a batch of read queries, with results awaiting conversion + std::unique_lock locker(m_args->m_data->query_results_mtx); + if(m_args->m_data->query_results.empty()){ + locker.unlock(); + usleep(100); + return; + } + std::swap(m_args->m_data->query_results, m_args->local_msg_queue); + locker.unlock(); + + // add a job for each batch to the queue + for(int i=0; ilocal_msg_queue.size(); ++i){ + + // add a new Job to the job queue to process this data + Job* the_job = m_args->m_data->job_pool.GetNew("result_worker"); + the_job->out_pool = &m_args->m_data->job_pool; + if(the_job->data == nullptr){ + // on first creation of the job, make it a JobStruct to encapsulate its data + // N.B. Pool::GetNew will only invoke the constructor if this is a new instance, + // (not if it's been used before and then returned to the pool) + // so don't pass job-specific variables to the constructor + the_job->data = m_args->job_struct_pool.GetNew(&m_args->job_struct_pool, m_args->m_data, m_args->monitoring_vars); + } else { + // FIXME error + std::cerr<<"result_worker Job with non-null data pointer!"<func = ResultJob; + the_job->fail_func = ResultJobFail; + + ResultJobStruct* job_data = static_cast(the_job->data); + job_data->batch = m_args->local_msg_queue[i]; + job_data->m_job_name = "result_worker"; + + //job_data->batch->push_time("result_job_push"); + + m_args->m_data->job_queue.AddJob(the_job); + + } + m_args->local_msg_queue.clear(); + + return; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void ResultWorkers::ResultJobFail(void*& arg){ + + // safety check in case the job somehow fails after returning its args to the pool + if(arg==nullptr){ + std::cerr<<"multicast worker fail with no args"<(arg); + std::cerr<m_job_name<<" failure"<monitoring_vars->jobs_failed); + + // return our job args to the pool + m_args->m_pool->Add(m_args); + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return; +} + +bool ResultWorkers::ResultJob(void*& arg){ + + ResultJobStruct* m_args = reinterpret_cast(arg); + //m_args->batch->push_time("result_worker_start"); + + // for now each job processes a batch, not a set of batches + //for(QueryBatch* batch : m_args->local_msg_queue){ + + // read queries need to have their results interpreted, + // write queries only need to have their insertion success status returned. + // each batch should only contain messages that are either all read or all write. + if(m_args->batch->queries.front().topic()[0]=='R'){ + + // process batch of read queries + + for(ZmqQuery& query : m_args->batch->queries){ + + // set whether the query succeeded or threw an exception + if(!query.err.empty()){ + query.setsuccess(0); + query.setresponserows(1); + query.setresponse(0, query.err); + + } else { + query.setsuccess(1); + + // returned rows are sent back formatted as JSON, with each row a new zmq::message_t + // resize zmq vector in preparation + query.setresponserows(std::size(query.result)); + // should always be 0 or 1 rows FIXME we could add a check for that here + + // just for good measure, when we try to access the pqxx result, + // enclose within try just in case it throws something + try { + // standard queries generated by the libDAQInterface use `row_to_json` + // to request results already packaged up into one JSON per row + // so all we need to do is copy that into the zmq message + for(size_t i=0; imonitoring_vars->result_access_errors); + } + + // release pqxx::result and clear error + query.Clear(); + + } // if we had a result object + } // loop over queries in this batch + + ++(m_args->monitoring_vars->read_batches_processed); + + } else { + + // process batch of write queries + // these are interleaved but results are grouped by type + size_t devconfig_i = 0; + size_t runconfig_i = 0; + size_t calibration_i = 0; + size_t plotlyplot_i = 0; + size_t rootplot_i = 0; + bool devconfigs_ok = !m_args->batch->devconfig_version_nums.empty(); + bool runconfigs_ok = !m_args->batch->runconfig_version_nums.empty(); + bool calibrations_ok = !m_args->batch->calibration_version_nums.empty(); + bool plotlyplots_ok = !m_args->batch->plotlyplot_version_nums.empty(); + bool rootplots_ok = !m_args->batch->rootplot_version_nums.empty(); + + for(ZmqQuery& query : m_args->batch->queries){ + + switch(query_topic{query.topic()[2]}){ + // alarms return just the success status + case query_topic::alarm: + query.setsuccess(m_args->batch->alarm_batch_err.empty()); + query.setresponserows(0); + break; + + // everything else returns a version number + case query_topic::dev_config: + query.setsuccess(devconfigs_ok); + query.setresponserows(1); + if(devconfigs_ok){ + query.setresponse(0, m_args->batch->devconfig_version_nums[devconfig_i++]); + } else { + query.setresponse(0, m_args->batch->devconfig_batch_err); + } + break; + + case query_topic::run_config: + query.setsuccess(runconfigs_ok); + query.setresponserows(1); + if(runconfigs_ok){ + query.setresponse(0, m_args->batch->runconfig_version_nums[runconfig_i++]); + } else { + query.setresponse(0, m_args->batch->runconfig_batch_err); + } + break; + + case query_topic::calibration: + query.setsuccess(calibrations_ok); + query.setresponserows(1); + if(calibrations_ok){ + query.setresponse(0, m_args->batch->calibration_version_nums[calibration_i++]); + } else { + query.setresponse(0, m_args->batch->calibration_batch_err); + } + break; + + case query_topic::plotlyplot: + query.setsuccess(plotlyplots_ok); + query.setresponserows(1); + if(plotlyplots_ok){ + query.setresponse(0, m_args->batch->plotlyplot_version_nums[plotlyplot_i++]); + } else { + query.setresponse(0, m_args->batch->plotlyplot_batch_err); + } + break; + + case query_topic::rootplot: + query.setsuccess(rootplots_ok); + query.setresponserows(1); + if(rootplots_ok){ + query.setresponse(0, m_args->batch->rootplot_version_nums[rootplot_i++]); + } else { + query.setresponse(0, m_args->batch->rootplot_batch_err); + } + break; + + case query_topic::generic: + + if(!query.err.empty()){ + query.setsuccess(0); + query.setresponserows(1); + query.setresponse(0, query.err); + + } else { + query.setsuccess(1); + + try { + // TODO if we can safely shoehorn in a wrapping call to `row_to_json` + // around a user's generic sql, we can combine this with the above. + // But, given the arbitrary complexity of statements, this may not be possible. + // in which case, we need to loop over rows and convert them to JSON manually + query.setresponserows(std::size(query.result)); + for(size_t i=0; itmpval = "{"; + for (pqxx::row::iterator it=query.result[i].begin(); ittmpval += ", "; + m_args->tmpval += "\"" + std::string{it->name()} + "\":"; + // Field values are returned bare: i.e. '3' or 'cat' or '{"iam":"ajson"}' + // but to convert this into JSON, strings need to be quoted: + // i.e. { "field1":3, "field2":"cat", "field3":{"iam":"ajson"} } + // this means we need to add enclosing quotes *only* for string fields + if((it->type()==18) || (it->type()==25) || (it->type()==1042) || (it->type()==1043)){ + m_args->tmpval += "\""+std::string{it->c_str()}+"\""; + } else { + m_args->tmpval += it->c_str(); + } + } + m_args->tmpval += "}"; + + query.setresponse(i, m_args->tmpval); + } + + } catch (std::exception& e){ + std::cerr<<"caught "<monitoring_vars->result_access_errors); + } + } + + break; + + default: + // FIXME corrupted topic, log it. + std::cerr<m_job_name<<" unknown topic "<monitoring_vars->write_batches_processed); + + } // if/else on whether this batch was read/write + + //m_args->batch->push_time("result_done"); + +// } // loop over query batches + + // pass the batch onto the next stage of the pipeline for the DatabaseWorkers + std::unique_lock locker(m_args->m_data->query_replies_mtx); + //m_args->m_data->query_replies.insert(m_args->m_data->query_replies.end(), + // m_args->local_msg_queue.begin(),m_args->local_msg_queue.end()); + m_args->m_data->query_replies.push_back(m_args->batch); + locker.unlock(); + + //printf("%s completed\n",m_args->m_job_name.c_str()); + ++(m_args->monitoring_vars->jobs_completed); + + // return our job args to the pool + m_args->m_pool->Add(m_args); // return our job args to the job args struct pool + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return true; +} + + diff --git a/UserTools/ResultWorkers/ResultWorkers.h b/UserTools/ResultWorkers/ResultWorkers.h new file mode 100644 index 0000000..9a9679d --- /dev/null +++ b/UserTools/ResultWorkers/ResultWorkers.h @@ -0,0 +1,62 @@ +#ifndef ResultWorkers_H +#define ResultWorkers_H + +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "ResultWorkerMonitoring.h" + +/** +* \class ResultWorkers +* +* This Tool spawns jobs that convert pqxx::result objects from read queries into zmq::message_t objects ready for sending back to clients +* +* $Author: Marcus O'Flaherty $ +* $Date: 2025/12/10 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +// class for things passed to result worker threads +struct ResultJobStruct { + + ResultJobStruct(Pool* pool, DataModel* data, ResultWorkerMonitoring* mon) : m_pool(pool), m_data(data), monitoring_vars(mon){}; + DataModel* m_data; + ResultWorkerMonitoring* monitoring_vars; + Pool* m_pool; + std::string m_job_name; + QueryBatch* batch; + std::stringstream ss; + std::string tmpval; + +}; + +struct ResultJobDistributor_args : Thread_args { + + DataModel* m_data; + ResultWorkerMonitoring* monitoring_vars; + std::vector local_msg_queue; // swap with datamodel and then pass out to jobs + Pool job_struct_pool{true, 1000, 100}; ///< pool for job args structs // FIXME default args + +}; + +class ResultWorkers: public Tool { + + public: + ResultWorkers(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose. + bool Finalise(); ///< Finalise funciton used to clean up resources. + + private: + static void Thread(Thread_args* args); + ResultJobDistributor_args thread_args; ///< args for the child thread that makes jobs for the job queue + ResultWorkerMonitoring monitoring_vars; + + static bool ResultJob(void*& arg); + static void ResultJobFail(void*& args); + +}; + +#endif diff --git a/UserTools/Sleep/README.md b/UserTools/Sleep/README.md new file mode 100644 index 0000000..28e8dfa --- /dev/null +++ b/UserTools/Sleep/README.md @@ -0,0 +1 @@ +# DAQFramework diff --git a/UserTools/Sleep/Sleep.cpp b/UserTools/Sleep/Sleep.cpp new file mode 100644 index 0000000..4420f5a --- /dev/null +++ b/UserTools/Sleep/Sleep.cpp @@ -0,0 +1,37 @@ +#include "Sleep.h" + +Sleep::Sleep():Tool(){} + + +bool Sleep::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + ExportConfiguration(); + + unsigned int period_ms = 10; + m_variables.Get("period_ms",period_ms); + toolchain_period_ms = std::chrono::milliseconds{period_ms}; + + last_execute = std::chrono::steady_clock::now(); + + return true; +} + + +bool Sleep::Execute(){ + + std::this_thread::sleep_until(last_execute+toolchain_period_ms); + last_execute = std::chrono::steady_clock::now(); + + return true; +} + + +bool Sleep::Finalise(){ + + return true; +} diff --git a/UserTools/Sleep/Sleep.h b/UserTools/Sleep/Sleep.h new file mode 100644 index 0000000..d2667a3 --- /dev/null +++ b/UserTools/Sleep/Sleep.h @@ -0,0 +1,34 @@ +#ifndef Sleep_H +#define Sleep_H + +#include +#include + +#include "Tool.h" +#include "DataModel.h" + +/** +* \class Sleep +* +* This Tool simply sleeps to throttle the rate of the main ToolChain Execute loop, to prevent the main thread pegging a CPU core. It may be useful for highly threaded toolchains which do minimal work in Execute functions. +* +* $Author: Marcus O'Flaherty $ +* $Date: 2026/09/01 $ +*/ + +class Sleep: public Tool { + + public: + Sleep(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose + bool Finalise(); ///< Finalise function used to clean up resources. + + private: + std::chrono::time_point last_execute; + std::chrono::milliseconds toolchain_period_ms; + +}; + + +#endif diff --git a/UserTools/SocketManager/README.md b/UserTools/SocketManager/README.md new file mode 100644 index 0000000..2fc073f --- /dev/null +++ b/UserTools/SocketManager/README.md @@ -0,0 +1,19 @@ +# SocketManager + +SocketManager + +## Data + +Describe any data formats SocketManager creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for SocketManager. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/SocketManager/SocketManager.cpp b/UserTools/SocketManager/SocketManager.cpp new file mode 100644 index 0000000..dbd6a0a --- /dev/null +++ b/UserTools/SocketManager/SocketManager.cpp @@ -0,0 +1,142 @@ +#include "SocketManager.h" + +SocketManager::SocketManager():Tool(){} + + +bool SocketManager::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + m_verbose=1; + int update_ms=2000; + + m_variables.Get("verbose",m_verbose); + m_variables.Get("update_ms",update_ms); + + ExportConfiguration(); + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + // for doing UpdateConnections + daq_utils = DAQUtilities(m_data->context); + + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + thread_args.daq_utils = &daq_utils; + thread_args.update_period_ms = std::chrono::milliseconds{update_ms}; + thread_args.last_update = std::chrono::steady_clock::now(); + thread_mtx.lock(); + thread_args.thread_mtx = &thread_mtx; + + if(!m_data->utils.CreateThread("socket_manager", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + m_data->sc_vars.Add("Clients", SlowControlElementType::INFO, nullptr, nullptr); // INFO type doesnt need read fnct + + return true; +} + + +bool SocketManager::Execute(){ + + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + return true; +} + + +bool SocketManager::Finalise(){ + + // signal job distributor thread to stop + Log("Joining socket manager thread",v_warning); + thread_args.running=false; + thread_mtx.unlock(); + m_data->utils.KillThread(&thread_args); + m_data->num_threads--; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + +void SocketManager::Thread(Thread_args* args){ + + SocketManager_args* m_args = dynamic_cast(args); + + //printf("SocketManager checking for new clients after %lu ms\n",std::chrono::duration_cast(std::chrono::steady_clock::now()-m_args->last_update).count()); + m_args->last_update = std::chrono::steady_clock::now(); + + bool new_clients=false; + + std::unique_lock container_locker(m_args->m_data->managed_sockets_mtx); + for(std::pair mgd_sock : m_args->m_data->managed_sockets){ + + ManagedSocket* sock = mgd_sock.second; + + std::unique_lock locker(sock->socket_mtx, std::defer_lock); + if(!locker.try_lock()){ + sock->socket_manager_request=true; + locker.lock(); + sock->socket_manager_request=false; + } + + int new_conn_count = std::abs((long long int)sock->connections.size() - m_args->daq_utils->UpdateConnections(sock->service_name, sock->socket, sock->connections, "", sock->remote_port_name)); + locker.unlock(); + + if(new_conn_count!=0){ + //m_args->m_data->services->SendLog(std::to_string(std::abs(new_conn_count))+" new connections to "+sock->service_name, v_message); // FIXME logging + printf("%d new %s connections made!\n",new_conn_count, sock->remote_port_name.c_str()); + new_clients = true; + + // update the list of clients so they can be queried + for(std::pair& aservice : sock->connections){ + if(!m_args->clientsmap.count(aservice.first)){ + m_args->clientsmap.emplace(aservice.first,sock->service_name); + } else { + m_args->clientsmap.at(aservice.first)+= ", "+sock->service_name; + } + } + + } + + } + container_locker.unlock(); + + if(new_clients){ + + std::string clientlist; + for(std::pair& aclient : m_args->clientsmap){ + if(!clientlist.empty()) clientlist+="\n"; + clientlist += aclient.first+": "+aclient.second; + } + if(clientlist.size()>0){ + // if client list is non-empty, remove trailing newline and set as slow control indicator + clientlist.pop_back(); + m_args->m_data->sc_vars["Clients"]->SetValue(clientlist); + } + + } + + //std::this_thread::sleep_until(m_args->last_update+m_args->update_period_ms); + std::unique_lock timed_locker(*m_args->thread_mtx, std::defer_lock); + timed_locker.try_lock_until(m_args->last_update+m_args->update_period_ms); + + return; + +} + + diff --git a/UserTools/SocketManager/SocketManager.h b/UserTools/SocketManager/SocketManager.h new file mode 100644 index 0000000..77abdc7 --- /dev/null +++ b/UserTools/SocketManager/SocketManager.h @@ -0,0 +1,54 @@ +#ifndef SocketManager_H +#define SocketManager_H + +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "SocketManagerMonitoring.h" + +/** +* \class SocketManager +* +* This Tool uses the DAQUtils class to periodically find new clients advertising relevant services and make new connections to their respective zmq sockets. +* +* $Author: Marcus O'Flaherty $ +* $Date: 2025/12/11 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +struct SocketManager_args : public Thread_args { + + DataModel* m_data; + SocketManagerMonitoring* monitoring_vars; + DAQUtilities* daq_utils; + std::map clientsmap; + + std::chrono::time_point last_update; + std::chrono::milliseconds update_period_ms; + + std::timed_mutex* thread_mtx; + +}; + +class SocketManager: public Tool { + + public: + SocketManager(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose. + bool Finalise(); ///< Finalise function used to clean up resources. + + private: + DAQUtilities daq_utils{nullptr}; + static void Thread(Thread_args* args); + SocketManager_args thread_args; + SocketManagerMonitoring monitoring_vars; + + std::timed_mutex thread_mtx; + +}; + + +#endif diff --git a/UserTools/SocketManager/SocketManagerMonitoring.h b/UserTools/SocketManager/SocketManagerMonitoring.h new file mode 100644 index 0000000..d82875d --- /dev/null +++ b/UserTools/SocketManager/SocketManagerMonitoring.h @@ -0,0 +1,23 @@ +#ifndef SocketManagerMonitoring_H +#define SocketManagerMonitoring_H + +#include "MonitoringVariables.h" + +class SocketManagerMonitoring : public MonitoringVariables { + public: + SocketManagerMonitoring(){}; + ~SocketManagerMonitoring(){}; + + // TODO add more monitoring + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/Unity.h b/UserTools/Unity.h index a38d466..2de0217 100644 --- a/UserTools/Unity.h +++ b/UserTools/Unity.h @@ -1 +1,16 @@ #include +#include "MulticastReceiverSender.h" +#include "MulticastWorkers.h" +#include "DatabaseWorkers.h" +#include "WriteQueryReceiver.h" +#include "ReadQueryReceiverReplySender.h" +#include "WriteWorkers.h" +#include "Monitoring.h" +#include "SocketManager.h" +#include "ResultWorkers.h" +#include "JobManager.h" +/* +#include "QueueTrimmer.h" +#include "MiddlemanNegotiate.h" +*/ +#include "Sleep.h" diff --git a/UserTools/WriteQueryReceiver/README.md b/UserTools/WriteQueryReceiver/README.md new file mode 100644 index 0000000..c36a581 --- /dev/null +++ b/UserTools/WriteQueryReceiver/README.md @@ -0,0 +1,19 @@ +# PubReceiver + +PubReceiver + +## Data + +Describe any data formats PubReceiver creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for PubReceiver. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/WriteQueryReceiver/WriteQueryReceiver.cpp b/UserTools/WriteQueryReceiver/WriteQueryReceiver.cpp new file mode 100644 index 0000000..f9eb39b --- /dev/null +++ b/UserTools/WriteQueryReceiver/WriteQueryReceiver.cpp @@ -0,0 +1,303 @@ +#include "WriteQueryReceiver.h" + +WriteQueryReceiver::WriteQueryReceiver():Tool(){} + + +bool WriteQueryReceiver::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + /* ----------------------------------------- */ + /* Configuration */ + /* ----------------------------------------- */ + +// am_master = true; // FIXME not sure being used any more + m_verbose=1; + remote_port_name = "db_write"; + // FIXME do these timeouts need to be << transfer_period_ms? + int poll_timeout_ms = 500; + int rcv_timeout_ms = 500; + int transfer_period_ms = 200; + int local_buffer_size = 200; + int rcv_hwm=10000; // FIXME sufficient? + int conns_backlog=1000; // FIXME sufficient? + + m_variables.Get("verbose",m_verbose); + m_variables.Get("remote_port_name", remote_port_name); + m_variables.Get("rcv_hwm", rcv_hwm); // max num outstanding messages in receive buffer + m_variables.Get("conns_backlog", conns_backlog); // max num oustanding connection requests + m_variables.Get("poll_timeout_ms",poll_timeout_ms); + m_variables.Get("rcv_timeout_ms",rcv_timeout_ms); + m_variables.Get("local_buffer_size", local_buffer_size); + m_variables.Get("transfer_period_ms", transfer_period_ms); +// m_variables.Get("am_master", am_master); + + ExportConfiguration(); + + /* ----------------------------------------- */ + /* Socket Setup */ + /* ----------------------------------------- */ + + // Write queries are received via a SUB socket so they get to both middlemen - only the master runs the query. + // acknowledgements and any 'returning' results are sent on the ROUTER socket used for receiving read queries + + // socket to receive published write queries from clients + // ------------------------------------------------------- + ManagedSocket* managed_socket = new ManagedSocket; + managed_socket->service_name=""; // attach to any client type... + managed_socket->remote_port_name = remote_port_name; // ...that advertises a service on port 'remote_port_name' + managed_socket->socket = new zmq::socket_t(*m_data->context, ZMQ_SUB); + // this socket never sends, so a send timeout is irrelevant. + managed_socket->socket->setsockopt(ZMQ_RCVTIMEO, rcv_timeout_ms); + // don't linger too long, it looks like the program crashed. + managed_socket->socket->setsockopt(ZMQ_LINGER, 10); + managed_socket->socket->setsockopt(ZMQ_SUBSCRIBE,"",0); + managed_socket->socket->setsockopt(ZMQ_RCVHWM,rcv_hwm); + managed_socket->socket->setsockopt(ZMQ_BACKLOG,conns_backlog); + + // add the socket to the datamodel for the SocketManager, which will handle making new connections to clients + std::unique_lock locker(m_data->managed_sockets_mtx); + m_data->managed_sockets[remote_port_name] = managed_socket; + + /* ----------------------------------------- */ + /* Thread Setup */ + /* ----------------------------------------- */ + + // monitoring struct to encapsulate tracking info + locker = std::unique_lock(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.m_tool_name = m_tool_name; + thread_args.monitoring_vars = &monitoring_vars; + thread_args.mgd_sock = managed_socket; + thread_args.poll_timeout_ms = poll_timeout_ms; + thread_args.poll = zmq::pollitem_t{*managed_socket->socket, 0, ZMQ_POLLIN, 0}; + thread_args.in_local_queue = m_data->querybatch_pool.GetNew(local_buffer_size); + thread_args.local_buffer_size = local_buffer_size; + thread_args.transfer_period_ms = std::chrono::milliseconds{transfer_period_ms}; + thread_args.last_transfer = std::chrono::steady_clock::now(); + thread_args.make_new = true; + + // thread needs a unique name + if(!m_data->utils.CreateThread("write_query_receiver", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + +bool WriteQueryReceiver::Execute(){ + + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + /* + FIXME are we doing this + if(am_master != am_master_last){ + if(m_data->am_master) Promote(); + else Demote(); + } + */ + + return true; +} + + +bool WriteQueryReceiver::Finalise(){ + + // signal background receiver thread to stop + Log("Joining receiver thread",v_warning); + m_data->utils.KillThread(&thread_args); + Log("receiver thread terminated",v_warning); + m_data->num_threads--; + + if(m_data->managed_sockets.count(remote_port_name)){ + std::unique_lock locker(m_data->managed_sockets_mtx); + ManagedSocket* sock = m_data->managed_sockets[remote_port_name]; + m_data->managed_sockets.erase(remote_port_name); + locker.unlock(); + if(sock->socket) delete sock->socket; // destructor closes socket + delete sock; + } + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + +void WriteQueryReceiver::Thread(Thread_args* args){ + + WriteQueryReceiver_args* m_args = reinterpret_cast(args); + + // transfer to datamodel + // ===================== + if(m_args->in_local_queue->queries.size() >= m_args->local_buffer_size || + (std::chrono::steady_clock::now() - m_args->last_transfer) > m_args->transfer_period_ms){ + + if(!m_args->in_local_queue->queries.empty()){ + + if(!m_args->make_new) m_args->in_local_queue->queries.pop_back(); + + //printf("%s adding %ld messages to datamodel\n",m_args->m_tool_name.c_str(),m_args->in_local_queue->queries.size()); + + //m_args->in_local_queue->push_time("receiver_to_DM"); + + std::unique_lock locker(m_args->m_data->write_msg_queue_mtx); + m_args->m_data->write_msg_queue.push_back(m_args->in_local_queue); + locker.unlock(); + + m_args->in_local_queue = m_args->m_data->querybatch_pool.GetNew(m_args->local_buffer_size); + + m_args->make_new=true; + ++(m_args->monitoring_vars->in_buffer_transfers); + + } + + m_args->last_transfer = std::chrono::steady_clock::now(); + + } + + // poll + // ==== + try { + // give priority to socketmanager + while(m_args->mgd_sock->socket_manager_request){ + usleep(1); + } + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + m_args->get_ok = zmq::poll(&m_args->poll, 1, m_args->poll_timeout_ms); + + if(m_args->get_ok<0){ + std::cerr<m_tool_name<<" poll failed with "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + + } catch(zmq::error_t& err){ + // ignore poll aborting due to signals + if(zmq_errno()==EINTR) return; + std::cerr<m_tool_name<<" poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } catch(std::exception& err){ + std::cerr<m_tool_name<<" poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } catch(...){ + std::cerr<m_tool_name<<" poll caught "<monitoring_vars->polls_failed); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + + // read + // ==== + if(m_args->poll.revents & ZMQ_POLLIN){ + //printf("%s receiving message\n",m_args->m_tool_name.c_str()); + + if(m_args->make_new){ + m_args->in_local_queue->queries.emplace_back(); + m_args->make_new = false; + } + ZmqQuery& msg_buf = m_args->in_local_queue->queries.back(); + msg_buf.parts.resize(4); + // received parts are [topic, client, msg_id, query] + // reorder parts on receipt as client and msg_id will be left untouched and re-used for response + static constexpr char part_order[4] = {2,0,1,3}; + m_args->msg_parts=0; + + // for debug only + //msg_buf.times.clear(); + //msg_buf.push_time("receive"); + + try { + + std::unique_lock locker(m_args->mgd_sock->socket_mtx); + //printf("%s receiving part...",m_args->m_tool_name.c_str()); + do { + m_args->get_ok = m_args->mgd_sock->socket->recv(&msg_buf[part_order[std::min(3,m_args->msg_parts++)]]); + //printf("%d=%d (more: %d),...",m_args->msg_parts,m_args->get_ok,msg_buf[part_order[std::min(3,m_args->msg_parts-1)]].more()); + } while(m_args->get_ok && msg_buf[part_order[std::min(3,m_args->msg_parts-1)]].more()); + locker.unlock(); + //printf("\n"); + + // if receive failed, discard the message + if(!m_args->get_ok){ + std::cerr<m_tool_name<<": receive failed with "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + + // if there weren't 4 parts, discard the message + if(m_args->msg_parts!=4){ + std::cerr<m_tool_name<<": Unexpected "<msg_parts<<" part message"<msg_parts; ++i){ + char msg_str[msg_buf[part_order[i]].size()]; + snprintf(&msg_str[0], msg_buf[part_order[i]].size()+1, "%s", msg_buf[part_order[i]].data()); + printf("\tpart %d: %s\n",i, msg_str); + } + ++(m_args->monitoring_vars->bad_msgs); + return; + } + + // else success + m_args->make_new=true; + ++(m_args->monitoring_vars->msgs_rcvd); + // XXX + //printf("%s received query %u, '%s' message '%s' into ZmqQuery at %p\n",m_args->m_tool_name.c_str(), msg_buf.msg_id(), msg_buf.topic().data(), msg_buf.msg().data(), &msg_buf); + + } catch(zmq::error_t& err){ + // receive aborted due to signals? + if(zmq_errno()==EINTR) return; // FIXME is this appropriate here? + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } catch(std::exception& err){ + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } catch(...){ + std::cerr<m_tool_name<<" receive caught "<monitoring_vars->rcv_fails); +// m_args->running=false; // FIXME Handle other errors? or just globally via restarting thread? or throw? + return; + } + + + } // else no messages from clients + + return; +} + +/* +bool WriteQueryReceiver::Promote(){ + // FIXME TODO if using a standby, need to connect to clients +} + +bool WriteQueryReceiver::Demote(){ + // FIXME TODO if using a standby, need to disconnect from clients + // (to prevent zmq buffering messages, and avoid load of unnecessarily reading them) +} +*/ diff --git a/UserTools/WriteQueryReceiver/WriteQueryReceiver.h b/UserTools/WriteQueryReceiver/WriteQueryReceiver.h new file mode 100644 index 0000000..59633cc --- /dev/null +++ b/UserTools/WriteQueryReceiver/WriteQueryReceiver.h @@ -0,0 +1,65 @@ +#ifndef WriteQueryReceiver_H +#define WriteQueryReceiver_H + +#include +#include + +#include "Tool.h" +#include "DataModel.h" +#include "WriteReceiveMonitoring.h" + +/** +* \class WriteQueryReceiver +* +* This Tool receives Write queries from clients over a ZMQ_SUB socket and pushes them to the DataModel +* +* $Author: M. O'Flaherty $ +* $Date: 2025/11/26 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + + +struct WriteQueryReceiver_args : public Thread_args { + + std::string m_tool_name; + DataModel* m_data; + WriteReceiveMonitoring* monitoring_vars; + ManagedSocket* mgd_sock=nullptr; + + int poll_timeout_ms; + zmq::pollitem_t poll; + zmq::message_t msg_discard; + bool make_new; + int msg_parts; + int get_ok; + QueryBatch* in_local_queue; + + std::chrono::time_point last_transfer; + std::chrono::milliseconds transfer_period_ms; + size_t local_buffer_size; + +}; + +class WriteQueryReceiver: public Tool { + + public: + WriteQueryReceiver(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Executre function used to perform Tool perpose. + bool Finalise(); ///< Finalise funciton used to clean up resorces + + private: + static void Thread(Thread_args* args); + WriteQueryReceiver_args thread_args; + WriteReceiveMonitoring monitoring_vars; + + std::string remote_port_name; // name by which clients advertise sockets for sending write queries to the DB + + bool am_master; + //bool Promote(); ///< Connect to clients to start receiving messages, if we became master + //bool Demote(); ///< Disconnect from clients to stop receiving & processing messages, if we are no longer master + + +}; + +#endif diff --git a/UserTools/WriteQueryReceiver/WriteReceiveMonitoring.h b/UserTools/WriteQueryReceiver/WriteReceiveMonitoring.h new file mode 100644 index 0000000..1d6f5ab --- /dev/null +++ b/UserTools/WriteQueryReceiver/WriteReceiveMonitoring.h @@ -0,0 +1,32 @@ +#ifndef WriteReceiveMonitoring_H +#define WriteReceiveMonitoring_H + +#include "MonitoringVariables.h" + +class WriteReceiveMonitoring : public MonitoringVariables { + public: + WriteReceiveMonitoring(){}; + ~WriteReceiveMonitoring(){}; + + std::atomic polls_failed; // error polling socket + std::atomic rcv_fails; // error in recv_from + std::atomic msgs_rcvd; // messages successfully received + std::atomic bad_msgs; // messages with the wrong number of zmq parts + std::atomic in_buffer_transfers; // transfers of thread-local message vector to datamodel + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"polls_failed\":"+std::to_string(polls_failed.load()) + +",\"rcv_fails\":"+std::to_string(rcv_fails.load()) + +",\"msgs_rcvd\":"+std::to_string(msgs_rcvd.load()) + +",\"bad_msgs\":"+std::to_string(bad_msgs.load()) + +",\"in_buffer_transfers\":"+std::to_string(in_buffer_transfers.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/WriteWorkers/README.md b/UserTools/WriteWorkers/README.md new file mode 100644 index 0000000..d5890a0 --- /dev/null +++ b/UserTools/WriteWorkers/README.md @@ -0,0 +1,19 @@ +# ReadWriteWorkers + +ReadWriteWorkers + +## Data + +Describe any data formats ReadWriteWorkers creates, destroys, changes, analyzes, or its usage. + + + + +## Configuration + +Describe any configuration variables for ReadWriteWorkers. + +``` +param1 value1 +param2 value2 +``` diff --git a/UserTools/WriteWorkers/WriteWorkerMonitoring.h b/UserTools/WriteWorkers/WriteWorkerMonitoring.h new file mode 100644 index 0000000..59c7b07 --- /dev/null +++ b/UserTools/WriteWorkers/WriteWorkerMonitoring.h @@ -0,0 +1,28 @@ +#ifndef WriteWorkerMonitoring_H +#define WriteWorkerMonitoring_H + +#include "MonitoringVariables.h" + +class WriteWorkerMonitoring : public MonitoringVariables { + public: + WriteWorkerMonitoring(){}; + ~WriteWorkerMonitoring(){}; + + std::atomic jobs_failed; + std::atomic jobs_completed; + std::atomic msgs_processed; // each job concatenates a batch of messages; this sums all batches + std::atomic thread_crashes; // restarts of tool worker thread (main thread found reader thread 'running' was false) + + std::string toJSON(){ + + std::string s="{\"jobs_failed\":"+std::to_string(jobs_failed.load()) + +",\"jobs_completed\":"+std::to_string(jobs_completed.load()) + +",\"msgs_processed\":"+std::to_string(msgs_processed.load()) + +",\"thread_crashes\":"+std::to_string(thread_crashes.load()) + +"}"; + + return s; + } +}; + +#endif diff --git a/UserTools/WriteWorkers/WriteWorkers.cpp b/UserTools/WriteWorkers/WriteWorkers.cpp new file mode 100644 index 0000000..a74ffe0 --- /dev/null +++ b/UserTools/WriteWorkers/WriteWorkers.cpp @@ -0,0 +1,237 @@ +#include "WriteWorkers.h" + +WriteWorkers::WriteWorkers():Tool(){} + + +bool WriteWorkers::Initialise(std::string configfile, DataModel &data){ + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); + //m_variables.Print(); + + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + + ExportConfiguration(); + + // monitoring struct to encapsulate tracking info + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.emplace(m_tool_name, &monitoring_vars); + + thread_args.m_data = m_data; + thread_args.monitoring_vars = &monitoring_vars; + if(!m_data->utils.CreateThread("write_job_distributor", &Thread, &thread_args)){ + Log("Failed to spawn background thread",v_error,m_verbose); + return false; + } + m_data->num_threads++; + + return true; +} + + +bool WriteWorkers::Execute(){ + + // FIXME ok but actually this kills all our jobs, not just our job distributor + // so we don't want to do that. + if(!thread_args.running){ + Log("Execute found thread not running!",v_error); + Finalise(); + Initialise(m_configfile, *m_data); // FIXME should we give up if Initialise returns false? should we set StopLoop to 1? + ++(monitoring_vars.thread_crashes); + } + + return true; +} + + +bool WriteWorkers::Finalise(){ + + // signal job distributor thread to stop + Log("Joining job distributor thread",v_warning); + m_data->utils.KillThread(&thread_args); + m_data->num_threads--; + + std::unique_lock locker(m_data->monitoring_variables_mtx); + m_data->monitoring_variables.erase(m_tool_name); + + Log("Finished",v_warning); + return true; +} + + +void WriteWorkers::Thread(Thread_args* args){ + + WriteJobDistributor_args* m_args = dynamic_cast(args); + m_args->local_msg_queue.clear(); + + // grab a batch of write queries + std::unique_lock locker(m_args->m_data->write_msg_queue_mtx); + if(!m_args->m_data->write_msg_queue.empty()){ + std::swap(m_args->m_data->write_msg_queue, m_args->local_msg_queue); + } else { + locker.unlock(); + usleep(100); + return; + } + locker.unlock(); + + // add a job for each batch to the queue + for(int i=0; ilocal_msg_queue.size(); ++i){ + + // add a new Job to the job queue to process this data + Job* the_job = m_args->m_data->job_pool.GetNew("write_worker"); + the_job->out_pool = &m_args->m_data->job_pool; + if(the_job->data == nullptr){ + // on first creation of the job, make it a JobStruct to encapsulate its data + // N.B. Pool::GetNew will only invoke the constructor if this is a new instance, + // (not if it's been used before and then returned to the pool) + // so don't pass job-specific variables to the constructor + the_job->data = m_args->job_struct_pool.GetNew(&m_args->job_struct_pool, m_args->m_data, m_args->monitoring_vars); + } else { + // this should never happen as jobs should return their args to the pool + std::cerr<<"WriteWorker Job with non-null data pointer!"<(the_job->data); + job_data->local_msg_queue = m_args->local_msg_queue[i]; + job_data->m_job_name = "write_worker"; + + //printf("spawning %s job\n", job_data->m_job_name.c_str()); + the_job->func = WriteMessageJob; + the_job->fail_func = WriteMessageFail; + + m_args->m_data->job_queue.AddJob(the_job); + //job_data->local_msg_queue->push_time("write_job_push"); + + } + + return; +} + +// ««-------------- ≪ °◇◆◇° ≫ --------------»» + +void WriteWorkers::WriteMessageFail(void*& arg){ + + // safety check in case the job somehow fails after returning its args to the pool + if(arg==nullptr){ + std::cerr<<"multicast worker fail with no args"< back somewhere for the failures + // to be reported to the clients + //m_args->m_data->query_buffer_pool.Add(m_args->msg_buffer); << FIXME not back to the pool but reply queue + + WriteJobStruct* m_args=static_cast(arg); + std::cerr<m_job_name<<" failure"<monitoring_vars->jobs_failed); + + // return our job args to the pool + m_args->m_pool->Add(m_args); + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return; +} + +bool WriteWorkers::WriteMessageJob(void*& arg){ + + WriteJobStruct* m_args = static_cast(arg); + + //m_args->local_msg_queue->push_time("writeworker_start"); + + //printf("%s job processing %d queries\n", m_args->m_job_name.c_str(), m_args->local_msg_queue->queries.size()); + + m_args->local_msg_queue->reset(); + + // pull next query from batch + for(size_t i=0; ilocal_msg_queue->queries.size(); ++i){ + + ZmqQuery& query = m_args->local_msg_queue->queries[i]; + + // we can only batch queries destined for the same table, + // so we need to split our messages up into different queues + // (this also means we can prioritise high priority queries such as alarms) + // we can do batch insertions with a 'returning version' statement to obtain + // a multi-record response with all the corresponding version numbers: e.g. + // INSERT INTO rootplots ( time, name, data ) SELECT * FROM jsonb_to_recordset + // ('[ {"time":"2025-12-05 23:31", "name":"dev1", "data":{"message":"blah"} }, + // {"time":"2025-12-05 23:25", "name":"dev2", "data":{"message":"argg"} } ]') + // as t(time timestamptz, name text, data jsonb) returning version;" + // as before, such batches need to be grouped according to destination table + switch(query_topic{query.topic()[2]}){ + case query_topic::alarm: + // alarm insertions require no return value, + // but we still need to send back an acknowledgement once the alarm is inserted + m_args->out_buffer = &m_args->local_msg_queue->alarm_buffer; + break; + case query_topic::dev_config: + m_args->out_buffer = &m_args->local_msg_queue->devconfig_buffer; + break; + case query_topic::run_config: + m_args->out_buffer = &m_args->local_msg_queue->runconfig_buffer; + break; + case query_topic::calibration: + m_args->out_buffer = &m_args->local_msg_queue->calibration_buffer; + break; + case query_topic::plotlyplot: + m_args->out_buffer = &m_args->local_msg_queue->plotlyplot_buffer; + break; + case query_topic::rootplot: + m_args->out_buffer = &m_args->local_msg_queue->rootplot_buffer; + break; + case query_topic::generic: + // these can't be buffered, just note their indices for the DB workers + m_args->local_msg_queue->generic_query_indices.push_back(i); + continue; + break; + default: + std::cerr<<"unrecognised topic '"<out_buffer->length()>1) (*m_args->out_buffer) += ", "; + (*m_args->out_buffer) += query.msg(); + + ++(m_args->monitoring_vars->msgs_processed); + + } + + // add closing ']' to any batch queries + m_args->local_msg_queue->close(); + + //m_args->local_msg_queue->push_time("writeworker_done"); + + // pass the batch onto the next stage of the pipeline for the DatabaseWorkers + std::unique_lock locker(m_args->m_data->write_query_queue_mtx); + m_args->m_data->write_query_queue.push_back(m_args->local_msg_queue); + locker.unlock(); + + //printf("%s queueing processed querybatch\n",m_args->m_job_name.c_str()); + ++(m_args->monitoring_vars->jobs_completed); + + // return our job args to the pool + m_args->m_pool->Add(m_args); // return our job args to the job args struct pool + m_args = nullptr; // clear the local m_args variable... not strictly necessary + arg = nullptr; // clear the job 'data' member variable + + return true; +} + + diff --git a/UserTools/WriteWorkers/WriteWorkers.h b/UserTools/WriteWorkers/WriteWorkers.h new file mode 100644 index 0000000..33bd050 --- /dev/null +++ b/UserTools/WriteWorkers/WriteWorkers.h @@ -0,0 +1,63 @@ +#ifndef WriteWorkers_H +#define WriteWorkers_H + +#include + +#include "Tool.h" +#include "DataModel.h" +#include "WriteWorkerMonitoring.h" + +/** +* \class WriteWorkers +* +* This Tool uses a worker pool to process write queries, converting received messages (structs encapsulating batches of zmq::message_t) into a format suitable for the DatabaseWorkers (array of JSONs). +* +* $Author: M. O'Flaherty $ +* $Date: 2025/12/04 $ +* Contact: marcus.o-flaherty@warwick.ac.uk +*/ + +// class for things passed to multicast worker threads +struct WriteJobStruct { + + WriteJobStruct(Pool* pool, DataModel* data, WriteWorkerMonitoring* mon) : m_pool(pool), m_data(data), monitoring_vars(mon){}; + DataModel* m_data; + WriteWorkerMonitoring* monitoring_vars; + Pool* m_pool; + std::string m_job_name; + QueryBatch* local_msg_queue; + std::string* out_buffer; + +}; + +struct WriteJobDistributor_args : Thread_args { + + DataModel* m_data; + WriteWorkerMonitoring* monitoring_vars; + std::string m_job_name; + std::vector local_msg_queue; // swap with datamodel and then pass out to jobs + // maybe we can use shared_ptr instead of a job args pool? - only useful for jobs retaining their args, + // i.e. job queues of a single type of job. + Pool job_struct_pool{true, 1000, 100}; ///< pool for job args structs // FIXME default args + +}; + +class WriteWorkers: public Tool { + + public: + WriteWorkers(); ///< Simple constructor + bool Initialise(std::string configfile,DataModel &data); ///< Initialise Function for setting up Tool resorces. @param configfile The path and name of the dynamic configuration file to read in. @param data A reference to the transient data class used to pass information between Tools. + bool Execute(); ///< Execute function used to perform Tool purpose. + bool Finalise(); ///< Finalise function used to clean up resources. + + private: + static void Thread(Thread_args* args); + WriteJobDistributor_args thread_args; ///< args for the child thread that makes jobs for the job queue + WriteWorkerMonitoring monitoring_vars; + + static bool WriteMessageJob(void*& arg); + static void WriteMessageFail(void*& arg); + +}; + +#endif diff --git a/UserTools/template/MyTool.cpp b/UserTools/template/MyTool.cpp index 6f9802b..2b68799 100644 --- a/UserTools/template/MyTool.cpp +++ b/UserTools/template/MyTool.cpp @@ -4,15 +4,16 @@ MyTool::MyTool():Tool(){} bool MyTool::Initialise(std::string configfile, DataModel &data){ - - if(configfile!="") m_variables.Initialise(configfile); + + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); //m_variables.Print(); - m_data= &data; - m_log= m_data->Log; + //your code here - if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; + ExportConfiguration(); return true; } diff --git a/UserTools/template/MyTool.h b/UserTools/template/MyTool.h index 4210f7f..a2638be 100644 --- a/UserTools/template/MyTool.h +++ b/UserTools/template/MyTool.h @@ -5,7 +5,7 @@ #include #include "Tool.h" - +#include "DataModel.h" /** * \class MyTool @@ -14,8 +14,8 @@ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ + class MyTool: public Tool { diff --git a/UserTools/template/MyToolDynamicMultiThread.cpp b/UserTools/template/MyToolDynamicMultiThread.cpp index 51f11f7..4c56d0b 100644 --- a/UserTools/template/MyToolDynamicMultiThread.cpp +++ b/UserTools/template/MyToolDynamicMultiThread.cpp @@ -10,22 +10,21 @@ MyToolDynamicMultiThread::MyToolDynamicMultiThread():Tool(){} bool MyToolDynamicMultiThread::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); //m_variables.Print(); - - m_data= &data; - m_log= m_data->Log; - + if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; - m_util=new Utilities(m_data->context); + m_util=new Utilities(); m_threadnum=0; CreateThread(); m_freethreads=1; - + ExportConfiguration(); return true; } @@ -33,9 +32,9 @@ bool MyToolDynamicMultiThread::Initialise(std::string configfile, DataModel &dat bool MyToolDynamicMultiThread::Execute(){ - for(int i=0; ibusy==0){ - std::cout<<"reply="<message<busy=1; break; @@ -44,8 +43,8 @@ bool MyToolDynamicMultiThread::Execute(){ } m_freethreads=0; - int lastfree=0; - for(int i=0; ibusy==0){ m_freethreads++; lastfree=i; @@ -55,9 +54,10 @@ bool MyToolDynamicMultiThread::Execute(){ if(m_freethreads<1) CreateThread(); if(m_freethreads>1) DeleteThread(lastfree); - std::cout<<"free threads="<KillThread(args.at(i)); + for(unsigned int i=0;iKillThread(args.at(pos)); delete args.at(pos); args.at(pos)=0; - args.erase(args.begin()+(pos-1)); + args.erase(args.begin()+(pos)); } diff --git a/UserTools/template/MyToolDynamicMultiThread.h b/UserTools/template/MyToolDynamicMultiThread.h index 6e5d480..a0606ad 100644 --- a/UserTools/template/MyToolDynamicMultiThread.h +++ b/UserTools/template/MyToolDynamicMultiThread.h @@ -5,6 +5,7 @@ #include #include "Tool.h" +#include "DataModel.h" /** * \struct MyToolDynamicMultiThread_args @@ -14,7 +15,6 @@ d and so will be thread safe * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ struct MyToolDynamicMultiThread_args:Thread_args{ @@ -33,8 +33,8 @@ struct MyToolDynamicMultiThread_args:Thread_args{ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ - * Contact: b.richards@qmul.ac.uk */ + class MyToolDynamicMultiThread: public Tool { @@ -49,13 +49,13 @@ class MyToolDynamicMultiThread: public Tool { private: void CreateThread(); ///< Function to Create Thread - void DeleteThread(int pos); ///< Function to delete thread @param pos is the position in the args vector below + void DeleteThread(unsigned int pos); ///< Function to delete thread @param pos is the position in the args vector below static void Thread(Thread_args* arg); ///< Function to be run by the thread in a loop. Make sure not to block in it Utilities* m_util; ///< Pointer to utilities class to help with threading std::vector args; ///< Vector of thread args (also holds pointers to the threads) - int m_freethreads; ///< Keeps track of free threads + unsigned int m_freethreads; ///< Keeps track of free threads unsigned long m_threadnum; ///< Counter for unique naming of threads }; diff --git a/UserTools/template/MyToolMultiThread.cpp b/UserTools/template/MyToolMultiThread.cpp index b68a09f..d7a400c 100644 --- a/UserTools/template/MyToolMultiThread.cpp +++ b/UserTools/template/MyToolMultiThread.cpp @@ -10,20 +10,19 @@ MyToolMultiThread::MyToolMultiThread():Tool(){} bool MyToolMultiThread::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); //m_variables.Print(); - m_data= &data; - m_log= m_data->Log; - if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; - int threadcount=0; + unsigned int threadcount=0; if(!m_variables.Get("Threads",threadcount)) threadcount=4; - m_util=new Utilities(m_data->context); + m_util=new Utilities(); - for(int i=0;ibusy=0; tmparg->message=""; @@ -35,7 +34,7 @@ bool MyToolMultiThread::Initialise(std::string configfile, DataModel &data){ m_freethreads=threadcount; - + ExportConfiguration(); return true; } @@ -43,9 +42,9 @@ bool MyToolMultiThread::Initialise(std::string configfile, DataModel &data){ bool MyToolMultiThread::Execute(){ - for(int i=0; ibusy==0){ - std::cout<<"reply="<message<busy=1; break; @@ -54,13 +53,14 @@ bool MyToolMultiThread::Execute(){ } m_freethreads=0; - for(int i=0; ibusy==0) m_freethreads++; } - std::cout<<"free threads="<KillThread(args.at(i)); + for(unsigned int i=0;iKillThread(args.at(i)); args.clear(); diff --git a/UserTools/template/MyToolMultiThread.h b/UserTools/template/MyToolMultiThread.h index f8fa067..0c5e16d 100644 --- a/UserTools/template/MyToolMultiThread.h +++ b/UserTools/template/MyToolMultiThread.h @@ -5,6 +5,7 @@ #include #include "Tool.h" +#include "DataModel.h" /** * \struct MyToolMultiThread_args @@ -13,7 +14,6 @@ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ - * Contact: b.richards@qmul.ac.uk */ struct MyToolMultiThread_args:Thread_args{ @@ -32,7 +32,6 @@ struct MyToolMultiThread_args:Thread_args{ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ - * Contact: b.richards@qmul.ac.uk */ class MyToolMultiThread: public Tool { @@ -52,7 +51,7 @@ class MyToolMultiThread: public Tool { Utilities* m_util; ///< Pointer to utilities class to help with threading std::vector args; ///< Vector of thread args (also holds pointers to the threads) - int m_freethreads; ///< Keeps track of free threads + unsigned int m_freethreads; ///< Keeps track of free threads }; diff --git a/UserTools/template/MyToolServiceAdd.cpp b/UserTools/template/MyToolServiceAdd.cpp index 307fb64..35a5a4f 100644 --- a/UserTools/template/MyToolServiceAdd.cpp +++ b/UserTools/template/MyToolServiceAdd.cpp @@ -5,16 +5,15 @@ MyToolServiceAdd::MyToolServiceAdd():Tool(){} bool MyToolServiceAdd::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); - //m_variables.Print(); + InitialiseTool(data); + InitialiseConfiguration(configfile); - m_data= &data; - m_log= m_data->Log; + //m_variables.Print(); if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; if(!m_variables.Get("Port",m_port)) m_port=5555; - m_util=new Utilities(m_data->context); + m_util=new DAQUtilities(m_data->context); sock = new zmq::socket_t(*(m_data->context), ZMQ_DEALER); @@ -25,6 +24,8 @@ bool MyToolServiceAdd::Initialise(std::string configfile, DataModel &data){ if (!m_util->AddService("MyService",m_port,false)) return false; + ExportConfiguration(); + return true; } diff --git a/UserTools/template/MyToolServiceAdd.h b/UserTools/template/MyToolServiceAdd.h index cb09d7d..d65f51c 100644 --- a/UserTools/template/MyToolServiceAdd.h +++ b/UserTools/template/MyToolServiceAdd.h @@ -5,6 +5,7 @@ #include #include "Tool.h" +#include "DataModel.h" /** * \class MyToolServiceAdd @@ -13,7 +14,6 @@ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ class MyToolServiceAdd: public Tool { @@ -28,7 +28,7 @@ class MyToolServiceAdd: public Tool { private: - Utilities* m_util; ///< Pointer to utilities class to help with threading + DAQUtilities* m_util; ///< Pointer to utilities class to help with threading zmq::socket_t* sock; ///< zmq socket pointer for socket to advertise int m_port; ///< Port to advertise diff --git a/UserTools/template/MyToolThread.cpp b/UserTools/template/MyToolThread.cpp index b90a7e1..2e8cec8 100644 --- a/UserTools/template/MyToolThread.cpp +++ b/UserTools/template/MyToolThread.cpp @@ -10,18 +10,19 @@ MyToolThread::MyToolThread():Tool(){} bool MyToolThread::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); + InitialiseTool(data); + m_configfile = configfile; + InitialiseConfiguration(configfile); //m_variables.Print(); - m_data= &data; - m_log= m_data->Log; - if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; - m_util=new Utilities(m_data->context); + m_util=new Utilities(); args=new MyToolThread_args(); m_util->CreateThread("test", &Thread, args); + + ExportConfiguration(); return true; } diff --git a/UserTools/template/MyToolThread.h b/UserTools/template/MyToolThread.h index 9faeb1c..af9439e 100644 --- a/UserTools/template/MyToolThread.h +++ b/UserTools/template/MyToolThread.h @@ -5,6 +5,7 @@ #include #include "Tool.h" +#include "DataModel.h" /** * \struct MyToolThread_args_args @@ -13,7 +14,6 @@ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ - * Contact: b.richards@qmul.ac.uk */ struct MyToolThread_args:Thread_args{ @@ -30,7 +30,6 @@ struct MyToolThread_args:Thread_args{ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ class MyToolThread: public Tool { diff --git a/UserTools/template/MyToolZMQMultiThread.cpp b/UserTools/template/MyToolZMQMultiThread.cpp index 12c27cc..69ca5ef 100644 --- a/UserTools/template/MyToolZMQMultiThread.cpp +++ b/UserTools/template/MyToolZMQMultiThread.cpp @@ -1,6 +1,6 @@ #include "MyToolZMQMultiThread.h" -MyToolZMQMultiThread_args::MyToolZMQMultiThread_args():Thread_args(){} +MyToolZMQMultiThread_args::MyToolZMQMultiThread_args():DAQThread_args(){} MyToolZMQMultiThread_args::~MyToolZMQMultiThread_args(){} @@ -10,18 +10,17 @@ MyToolZMQMultiThread::MyToolZMQMultiThread():Tool(){} bool MyToolZMQMultiThread::Initialise(std::string configfile, DataModel &data){ - if(configfile!="") m_variables.Initialise(configfile); - //m_variables.Print(); + InitialiseTool(data); + InitialiseConfiguration(configfile); - m_data= &data; - m_log= m_data->Log; + //m_variables.Print(); if(!m_variables.Get("verbose",m_verbose)) m_verbose=1; int threadcount=0; if(!m_variables.Get("Threads",threadcount)) threadcount=4; - m_util=new Utilities(m_data->context); + m_util=new DAQUtilities(m_data->context); ManagerSend=new zmq::socket_t(*m_data->context,ZMQ_PUSH); ManagerSend->bind("inproc://MyToolZMQMultiThreadSend"); @@ -62,7 +61,7 @@ bool MyToolZMQMultiThread::Initialise(std::string configfile, DataModel &data){ m_freethreads=threadcount; - + ExportConfiguration(); return true; } @@ -77,7 +76,7 @@ bool MyToolZMQMultiThread::Execute(){ zmq::message_t message; ManagerReceive->recv(&message); std::istringstream iss(static_cast(message.data())); - std::cout<<"reply = "<KillThread(args.at(i)); delete args.at(i)->ThreadSend; diff --git a/UserTools/template/MyToolZMQMultiThread.h b/UserTools/template/MyToolZMQMultiThread.h index 2b26844..de30253 100644 --- a/UserTools/template/MyToolZMQMultiThread.h +++ b/UserTools/template/MyToolZMQMultiThread.h @@ -5,6 +5,7 @@ #include #include "Tool.h" +#include "DataModel.h" /** * \struct ZMQMyToolMultiThread_args @@ -14,11 +15,10 @@ d and so will be thread safe * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ -* Contact: b.richards@qmul.ac.uk */ -struct MyToolZMQMultiThread_args:Thread_args{ +struct MyToolZMQMultiThread_args:DAQThread_args{ MyToolZMQMultiThread_args(); ~MyToolZMQMultiThread_args(); @@ -38,7 +38,6 @@ struct MyToolZMQMultiThread_args:Thread_args{ * * $Author: B.Richards $ * $Date: 2019/05/28 10:44:00 $ - * Contact: b.richards@qmul.ac.uk */ class MyToolZMQMultiThread: public Tool { @@ -54,7 +53,7 @@ class MyToolZMQMultiThread: public Tool { private: static void Thread(Thread_args* arg); ///< Function to be run by the thread in a loop. Make sure not to block in it - Utilities* m_util; ///< Pointer to utilities class to help with threading + DAQUtilities* m_util; ///< Pointer to utilities class to help with threading std::vector args; ///< Vector of thread args (also holds pointers to the threads) zmq::pollitem_t items[2]; ///< This is used to both inform the poll and store its output. Allows for multitasking sockets diff --git a/UserTools/template/README.md b/UserTools/template/README.md index e920b73..28e8dfa 100644 --- a/UserTools/template/README.md +++ b/UserTools/template/README.md @@ -1,19 +1 @@ -# MyTool - -MyTool - -## Data - -Describe any data formats MyTool creates, destroys, changes, analyzes, or its usage. - - - - -## Configuration - -Describe any configuration variables for MyTool. - -``` -param1 value1 -param2 value2 -``` +# DAQFramework diff --git a/configfiles/Dummy/ToolChainConfig b/configfiles/Dummy/ToolChainConfig index 3ff5f77..533cb97 100644 --- a/configfiles/Dummy/ToolChainConfig +++ b/configfiles/Dummy/ToolChainConfig @@ -1,30 +1,59 @@ #ToolChain dynamic setup file ##### Runtime Paramiters ##### -verbose 9 -error_level 0 # 0= do not exit, 1= exit on unhandeled errors only, 2= exit on unhandeled errors and handeled errors -attempt_recover 1 -remote_port 24004 -IO_Threads 1 ## Number of threads for network traffic (~ 1/Gbps) +UUID_path ./UUID # UUID_path for fixed UUID, if doesnt exist file will be generated. Remove option for always random +verbose 9 # Verbosity level of ToolChain +error_level 0 # 0= do not exit, 1= exit on unhandeled errors only, 2= exit on unhandeled errors and handeled errors +attempt_recover 1 # 1= will attempt to finalise if an execute fails, 0= will not +remote_port 24004 # port to open for remote commands if running in remote mode +IO_Threads 1 # Number of threads for network traffic (~ 1/Gbps) +alerts_send 1 # enable ability to send global alerts +alert_send_port 12242 # port to send global alerts +alerts_receive 1 # enable ability to receive global alerts +alert_receive_port 12243 # port to receive global alerts +sc_port 60000 # port for slow control ###### Logging ##### -log_mode Interactive # Interactive=cout , Remote= remote logging system "serservice_name Remote_Logging" , Local = local file log; -log_local_path ./log -log_service LogStore -log_port 24010 +log_interactive 1 # Interactive=cout; 0=false, 1= true +log_local 1 # Local = local file log; 0=false, 1= true +log_local_path ./log # file to store logs to if local is active +log_remote 1 # Remote= remote logging system "serservice_name Remote_Logging"; 0=false, 1= true +log_address 239.192.1.2 # Remote multicast address to send logs +log_port 5000 # port on remote machine to connect to +log_append_time 0 # append seconds since epoch to filename; 0=false, 1= true +log_split_files 1 # seperate output and error log files (named x.o and x.e) ###### Service discovery ##### -service_discovery_address 239.192.1.1 -service_discovery_port 5000 -service_name main_service2 -service_publish_sec 5 -service_kick_sec 60 +service_discovery_address 239.192.1.1 # multicast address to use for service discovery +service_discovery_port 5000 # port to use for service discovery +service_name main_service2 # name of Toolchain service to braodcast +service_publish_sec 5 # heartbeat send period +service_kick_sec 60 # remove hosts with no heartbeat after given period + + +###### Backend Services ##### NEWLY ADDED NEEDS TO HAVE PROPER DESCRIPTIONS AND SOME PRUNING BEFORE RELEASE +use_backend_services 1 # +db_name daq # +verbosity 1 # +max_retries 3 # +advertise_endpoints 0 # +resend_period_ms 1000 # +print_stats_period_ms 1000 # +clt_pub_port 55556 # +clt_dlr_port 55555 # +clt_pub_socket_timeout 500 # +clt_dlr_socket_timeout 500 # +inpoll_timeout 50 # keep these short! +outpoll_timeout 50 # keep these short! +command_timeout 2000 # +mon_port 5000 # +mon_address 239.192.1.3 # ##### Tools To Add ##### -Tools_File configfiles/Dummy/ToolsConfig +Tools_File configfiles/Dummy/ToolsConfig # list of tools to run and their config files ##### Run Type ##### -Inline 1 -Interactive 0 -Remote 0 +Inline 1 # number of Execute steps in program, -1 infinite loop that is ended by user +Interactive 0 # set to 1 if you want to run the code interactively +Remote 0 # set to 1 if you want to run the code remotely diff --git a/configfiles/middleman/DatabaseWorkersConfig b/configfiles/middleman/DatabaseWorkersConfig new file mode 100644 index 0000000..3a1abaf --- /dev/null +++ b/configfiles/middleman/DatabaseWorkersConfig @@ -0,0 +1,8 @@ +verbose 2 +hostname /tmp +#hostaddr 127.0.0.1 +dbname daq +port 5432 +user root # fall back to PGUSER env var if not defined? FIXME better user? +#passwd +max_workers 10 diff --git a/configfiles/middleman/DummyToolConfig b/configfiles/middleman/DummyToolConfig new file mode 100644 index 0000000..95cad88 --- /dev/null +++ b/configfiles/middleman/DummyToolConfig @@ -0,0 +1,3 @@ +# Dummy config file + +verbose 2 \ No newline at end of file diff --git a/configfiles/middleman/JobManagerConfig b/configfiles/middleman/JobManagerConfig new file mode 100644 index 0000000..4166aac --- /dev/null +++ b/configfiles/middleman/JobManagerConfig @@ -0,0 +1,4 @@ +verbose 2 +#thread_cap 100 # default: 80% of detected CPUs +#global_thread_cap # default: == thread_cap +self_serving 1 diff --git a/configfiles/middleman/LoggingReceiveSenderConfig b/configfiles/middleman/LoggingReceiveSenderConfig new file mode 100644 index 0000000..5cace3a --- /dev/null +++ b/configfiles/middleman/LoggingReceiveSenderConfig @@ -0,0 +1,8 @@ +verbose 2 +type logging +multicast_address 239.192.1.2 +port 5000 +local_buffer_size 200 +transfer_period_ms 100 +poll_timeout_ms 20 + diff --git a/configfiles/middleman/MonitoringConfig b/configfiles/middleman/MonitoringConfig new file mode 100644 index 0000000..1c1264c --- /dev/null +++ b/configfiles/middleman/MonitoringConfig @@ -0,0 +1,2 @@ +verbose 2 +monitoring_period_ms 60000 diff --git a/configfiles/middleman/MonitoringReceiveSenderConfig b/configfiles/middleman/MonitoringReceiveSenderConfig new file mode 100644 index 0000000..54e9459 --- /dev/null +++ b/configfiles/middleman/MonitoringReceiveSenderConfig @@ -0,0 +1,8 @@ +verbose 2 +type monitoring +multicast_address 239.192.1.3 +port 5000 +local_buffer_size 200 +transfer_period_ms 100 +poll_timeout_ms 20 + diff --git a/configfiles/middleman/MulticastWorkersConfig b/configfiles/middleman/MulticastWorkersConfig new file mode 100644 index 0000000..384c877 --- /dev/null +++ b/configfiles/middleman/MulticastWorkersConfig @@ -0,0 +1 @@ +verbose 2 diff --git a/configfiles/middleman/README.md b/configfiles/middleman/README.md new file mode 100644 index 0000000..5afa52c --- /dev/null +++ b/configfiles/middleman/README.md @@ -0,0 +1,25 @@ +# Configure files + +*********************** +#Description +********************** + +Configure files are simple text files for passing variables to the Tools. + +Text files are read by the Store class (src/Store) and automatically asigned to an internal map for the relavent Tool to use. + + +************************ +#Useage +************************ + +Any line starting with a "#" will be ignored by the Store, as will blank lines. + +Variables should be stored one per line as follows: + + +Name Value #Comments + + +Note: Only one value is permitted per name and they are stored in a string stream and templated cast back to the type given. + diff --git a/configfiles/middleman/ReadQueryReceiverReplySenderConfig b/configfiles/middleman/ReadQueryReceiverReplySenderConfig new file mode 100644 index 0000000..5e674e5 --- /dev/null +++ b/configfiles/middleman/ReadQueryReceiverReplySenderConfig @@ -0,0 +1,10 @@ +verbose 2 +port_name db_read +rcv_hwm 10000 +conns_backlog 30000 +poll_timeout_ms 10 +snd_timeout_ms 10 +rcv_timeout_ms 10 +local_buffer_size 200 +transfer_period_ms 200 + diff --git a/configfiles/middleman/ResultWorkersConfig b/configfiles/middleman/ResultWorkersConfig new file mode 100644 index 0000000..caf332a --- /dev/null +++ b/configfiles/middleman/ResultWorkersConfig @@ -0,0 +1,2 @@ +verbose 2 + diff --git a/configfiles/middleman/SleepConfig b/configfiles/middleman/SleepConfig new file mode 100644 index 0000000..1ab4c09 --- /dev/null +++ b/configfiles/middleman/SleepConfig @@ -0,0 +1,2 @@ +verbose 1 +period_ms 10 diff --git a/configfiles/middleman/SocketManagerConfig b/configfiles/middleman/SocketManagerConfig new file mode 100644 index 0000000..f9014b8 --- /dev/null +++ b/configfiles/middleman/SocketManagerConfig @@ -0,0 +1,2 @@ +verbose 2 +update_ms 2000 diff --git a/configfiles/middleman/ToolChainConfig b/configfiles/middleman/ToolChainConfig new file mode 100644 index 0000000..107c0de --- /dev/null +++ b/configfiles/middleman/ToolChainConfig @@ -0,0 +1,56 @@ +#ToolChain dynamic setup file + +##### Runtime Paramiters ##### +UUID_path ./UUID # UUID_path for fixed UUID, if doesnt exist file will be generated. Remove option for always random +verbose 2 # Verbosity level of ToolChain +error_level 0 # 0= do not exit, 1= exit on unhandeled errors only, 2= exit on unhandeled errors and handeled errors +attempt_recover 1 # 1= will attempt to finalise if an execute fails, 0= will not +remote_port 24002 # port to open for remote commands if running in remote mode +IO_Threads 1 # Number of threads for network traffic (~ 1/Gbps) +alerts_send 1 # enable ability to send global alerts +alert_send_port 12252 # port to send global alerts +alerts_receive 1 # enable ability to receive global alerts +alert_receive_port 12253 # port to receive global alerts +sc_port 65000 # port for slow control + +###### Logging ##### +log_interactive 1 # Interactive=cout; 0=false, 1= true +log_local 0 # Local = local file log; 0=false, 1= true +log_local_path ./log # file to store logs to if local is active +log_remote 0 # Remote= remote logging system "serservice_name Remote_Logging"; 0=false, 1= true +log_address 239.192.1.2 # Remote multicast address to send logs +log_port 5000 # port on remote machine to connect to +log_append_time 0 # append seconds since epoch to filename; 0=false, 1= true +log_split_files 0 # seperate output and error log files (named x.o and x.e) + +###### Service discovery ##### Ignore these settings for local analysis +service_discovery_address 239.192.1.1 # multicast address to use for service discovery +service_discovery_port 5000 # port to use for service discovery +service_name middleman # name of Toolchain service to braodcast +service_publish_sec 5 # heartbeat send period +service_kick_sec 60 # remove hosts with no heartbeat after given period + +###### Backend Services ##### NEWLY ADDED NEEDS TO HAVE PROPER DESCRIPTIONS AND SOME PRUNING BEFORE RELEASE +use_backend_services 0 # +verbosity 1 # +max_retries 3 # +resend_period_ms 1000 # +print_stats_period_ms 1000 # +clt_pub_port 55556 # +clt_dlr_port 55555 # +clt_pub_socket_timeout 500 # +clt_dlr_socket_timeout 500 # +inpoll_timeout 50 # keep these short! +outpoll_timeout 50 # keep these short! +command_timeout 2000 # +mon_port 5000 # +mon_address 239.192.1.3 # + +##### Tools To Add ##### +Tools_File configfiles/middleman/ToolsConfig # list of tools to run and their config files + +##### Run Type ##### +Inline 0 # number of Execute steps in program, -1 infinite loop that is ended by user +Interactive 0 # set to 1 if you want to run the code interactively +Remote 1 # set to 1 if you want to run the code remotely + diff --git a/configfiles/middleman/ToolsConfig b/configfiles/middleman/ToolsConfig new file mode 100644 index 0000000..8072297 --- /dev/null +++ b/configfiles/middleman/ToolsConfig @@ -0,0 +1,13 @@ +loggingReceiver MulticastReceiverSender configfiles/middleman/LoggingReceiveSenderConfig +monitoringReceiver MulticastReceiverSender configfiles/middleman/MonitoringReceiveSenderConfig +writeReceiver WriteQueryReceiver configfiles/middleman/WriteQueryReceiverConfig +readReceiver ReadQueryReceiverReplySender configfiles/middleman/ReadQueryReceiverReplySenderConfig +multicastWorkers MulticastWorkers configfiles/middleman/MulticastWorkersConfig +writeWorkers WriteWorkers configfiles/middleman/WriteWorkersConfig +databaseWorkers DatabaseWorkers configfiles/middleman/DatabaseWorkersConfig +resultWorkers ResultWorkers configfiles/middleman/ResultWorkersConfig +jobManager JobManager configfiles/middleman/JobManagerConfig +socketManager SocketManager configfiles/middleman/SocketManagerConfig +monitoring Monitoring configfiles/middleman/MonitoringConfig +sleep Sleep configfiles/middleman/SleepConfig + diff --git a/configfiles/middleman/WriteQueryReceiverConfig b/configfiles/middleman/WriteQueryReceiverConfig new file mode 100644 index 0000000..0921e2b --- /dev/null +++ b/configfiles/middleman/WriteQueryReceiverConfig @@ -0,0 +1,9 @@ +verbose 2 +port_name db_write +rcv_hwm 10000 +conns_backlog 30000 +rcv_timeout_ms 10 +poll_timeout_ms 10 +local_buffer_size 200 +transfer_period_ms 200 +#am_master 1 diff --git a/configfiles/middleman/WriteWorkersConfig b/configfiles/middleman/WriteWorkersConfig new file mode 100644 index 0000000..caf332a --- /dev/null +++ b/configfiles/middleman/WriteWorkersConfig @@ -0,0 +1,2 @@ +verbose 2 + diff --git a/configfiles/template/ToolChainConfig b/configfiles/template/ToolChainConfig index 0c08a37..badb1c3 100644 --- a/configfiles/template/ToolChainConfig +++ b/configfiles/template/ToolChainConfig @@ -1,30 +1,58 @@ #ToolChain dynamic setup file ##### Runtime Paramiters ##### -verbose 1 ## Verbosity level of ToolChain -error_level 0 # 0= do not exit, 1= exit on unhandeled errors only, 2= exit on unhandeled errors and handeled errors -attempt_recover 1 ## 1= will attempt to finalise if an execute fails -remote_port 24002 -IO_Threads 1 ## Number of threads for network traffic (~ 1/Gbps) +UUID_path ./UUID # UUID_path for fixed UUID, if doesnt exist file will be generated. Remove option for always random +verbose 1 # Verbosity level of ToolChain +error_level 0 # 0= do not exit, 1= exit on unhandeled errors only, 2= exit on unhandeled errors and handeled errors +attempt_recover 1 # 1= will attempt to finalise if an execute fails, 0= will not +remote_port 24002 # port to open for remote commands if running in remote mode +IO_Threads 1 # Number of threads for network traffic (~ 1/Gbps) +alerts_send 1 # enable ability to send global alerts +alert_send_port 12242 # port to send global alerts +alerts_receive 1 # enable ability to receive global alerts +alert_receive_port 12243 # port to receive global alerts +sc_port 60000 # port for slow control ###### Logging ##### -log_mode Interactive # Interactive=cout , Remote= remote logging system "serservice_name Remote_Logging" , Local = local file log; -log_local_path ./log -log_service LogStore -log_port 24010 +log_interactive 1 # Interactive=cout; 0=false, 1= true +log_local 0 # Local = local file log; 0=false, 1= true +log_local_path ./log # file to store logs to if local is active +log_remote 0 # Remote= remote logging system "serservice_name Remote_Logging"; 0=false, 1= true +log_address 239.192.1.2 # Remote multicast address to send logs +log_port 5000 # port on remote machine to connect to +log_append_time 0 # append seconds since epoch to filename; 0=false, 1= true +log_split_files 0 # seperate output and error log files (named x.o and x.e) ###### Service discovery ##### Ignore these settings for local analysis -service_discovery_address 239.192.1.1 -service_discovery_port 5000 -service_name ToolDAQ_Service -service_publish_sec 5 -service_kick_sec 60 +service_discovery_address 239.192.1.1 # multicast address to use for service discovery +service_discovery_port 5000 # port to use for service discovery +service_name ToolDAQ_Service # name of Toolchain service to braodcast +service_publish_sec 5 # heartbeat send period +service_kick_sec 60 # remove hosts with no heartbeat after given period + +###### Backend Services ##### NEWLY ADDED NEEDS TO HAVE PROPER DESCRIPTIONS AND SOME PRUNING BEFORE RELEASE +use_backend_services 1 # +db_name daq # +verbosity 1 # +max_retries 3 # +advertise_endpoints 0 # +resend_period_ms 1000 # +print_stats_period_ms 1000 # +clt_pub_port 55556 # +clt_dlr_port 55555 # +clt_pub_socket_timeout 500 # +clt_dlr_socket_timeout 500 # +inpoll_timeout 50 # keep these short! +outpoll_timeout 50 # keep these short! +command_timeout 2000 # +mon_port 5000 # +mon_address 239.192.1.3 # ##### Tools To Add ##### -Tools_File configfiles/ToolsConfig ## list of tools to run and their config files +Tools_File configfiles/ToolsConfig # list of tools to run and their config files ##### Run Type ##### -Inline 1 ## number of Execute steps in program, -1 infinite loop that is ended by user -Interactive 0 ## set to 1 if you want to run the code interactively -Remote 0 ## set to 1 if you want to run the code remotely +Inline 1 # number of Execute steps in program, -1 infinite loop that is ended by user +Interactive 0 # set to 1 if you want to run the code interactively +Remote 0 # set to 1 if you want to run the code remotely diff --git a/src/main.cpp b/src/main.cpp index 8529b78..6afda9e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,9 @@ #include -#include "ToolChain.h" -#include "DummyTool.h" +#include "ToolDAQChain.h" +#include "DataModel.h" +//#include "DummyTool.h" + +using namespace ToolFramework; int main(int argc, char* argv[]){ @@ -8,7 +11,9 @@ int main(int argc, char* argv[]){ if (argc==1)conffile="configfiles/Dummy/ToolChainConfig"; else conffile=argv[1]; - ToolChain tools(conffile, argc, argv); + DataModel* data_model = new DataModel(); + ToolDAQChain tools(conffile, data_model, argc, argv); + //DummyTool dummytool;