NASA Astrobee Robot Software  0.19.1
Flight software for the Astrobee robots operating inside the International Space Station.
cpu_mem_monitor.h
Go to the documentation of this file.
1 /* Copyright (c) 2017, United States Government, as represented by the
2  * Administrator of the National Aeronautics and Space Administration.
3  *
4  * All rights reserved.
5  *
6  * The Astrobee platform is licensed under the Apache License, Version 2.0
7  * (the "License"); you may not use this file except in compliance with the
8  * License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15  * License for the specific language governing permissions and limitations
16  * under the License.
17  */
18 
19 
20 #ifndef CPU_MEM_MONITOR_CPU_MEM_MONITOR_H_
21 #define CPU_MEM_MONITOR_CPU_MEM_MONITOR_H_
22 
23 #include <pluginlib/class_list_macros.h>
24 
25 #include <ros/ros.h>
26 #include <ros/package.h>
27 
28 #include <inttypes.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33 
34 #include <sys/types.h>
35 #include <sys/sysinfo.h>
36 #include <shared_mutex>
37 
39 #include <cpu_mem_monitor/cpu.h>
40 #include <ff_msgs/CpuState.h>
41 #include <ff_msgs/CpuStateStamped.h>
42 #include <ff_msgs/MemState.h>
43 #include <ff_msgs/MemStateStamped.h>
44 #include <ff_common/ff_names.h>
45 #include <ff_util/ff_nodelet.h>
46 
47 #include <cstdint>
48 #include <string>
49 #include <vector>
50 #include <map>
51 
52 namespace cpu_mem_monitor {
53 
59 };
60 
62  public:
66  CpuMemMonitor();
71 
72  protected:
73  virtual void Initialize(ros::NodeHandle *nh);
74  bool ReadParams();
75 
76  private:
77  // Get the PIDs of the nodes to monitor
78  void GetPIDs();
79 
80  // Assert CPU loads and report if too high
81  void AssertCPULoadHighFaultCallback(ros::TimerEvent const& te);
82  void ClearCPULoadHighFaultCallback(ros::TimerEvent const& te);
83 
84  // Assert Memory loads and report if too high
85  void AssertMemLoadHighFaultCallback(ros::TimerEvent const& te);
86  void ClearMemLoadHighFaultCallback(ros::TimerEvent const& te);
87 
88  // Assert the collected CPU and Memory status
89  void AssertCpuStats();
90  void AssertMemStats();
91 
92  // Collect usage stats about all the CPUs and memory, calculate
93  // percentages based on the last time this was called. You should
94  // call this in regular intervals for the numbers to make sense over time.
95  int CollectCPUStats();
96  int CollectMemStats();
97 
98  // Callback to scan and publish the CPU and Memory stats
99  void PublishStatsCallback(ros::TimerEvent const &te);
100 
101  private:
102  struct Load {
103  std::uint64_t total_time,
104  user_time,
105  system_time,
106  idle_time,
107  nice_time,
108  io_time,
109  irq_time,
110  soft_irq_time,
111  steal_time,
112  guest_time,
113  system_all_time,
114  idle_all_time;
115 
116  double total_percentage,
117  nice_percentage,
118  user_percentage,
119  system_percentage,
120  virt_percentage;
121  };
122 
123  std::shared_timed_mutex pid_lock_;
124 
125  config_reader::ConfigReader config_params_;
126  ros::Publisher cpu_state_pub_; // Cpu stats publisher
127  ros::Publisher mem_state_pub_; // Memory stats publisher
128  ros::Timer reload_params_timer_; // Ckeck if parameters were updated
129  ros::Timer stats_timer_; // Update stats
130  ros::Timer assert_cpu_load_fault_timer_; // Check cpu load limits
131  ros::Timer clear_cpu_load_fault_timer_; // Clear cpu fault timer
132  ros::Timer assert_mem_load_fault_timer_; // Check memory load limits
133  ros::Timer clear_mem_load_fault_timer_; // Clear memory fault timer
134  int pub_queue_size_; // Monitor publishing queue size
135  double update_freq_hz_; // Publishing update frequency
136  struct sysinfo mem_info_; // Scope memory info from sysinfo
137 
138  unsigned int ncpus_; // Number of cpu's
139  std::string processor_name_; // Processor running this monitor (mlp,llp)
140 
141  std::string monitor_host_; // Either mlp, llp, hlp
142 
143  std::map<std::string, int> nodes_pid_; // Map PID values
144  std::map<std::string, uint64_t> nodes_proc_time_; // Map proc time values
145 
146  // CPU Monitor Parameters
147  bool temp_fault_triggered_; // Temperature fault triggered
148  Cpu freq_cpus_;
149  double temperature_scale_; // Scale of temperature
150  float avg_cpu_load_high_value_;
151  std::vector<Load> load_cpus_;
152  int cpu_avg_load_limit_, cpu_temp_limit_;
153 
154  // Memory Monitor Parameters
155  float mem_load_value_, avg_mem_load_high_value_, mem_load_limit_;
156 
157 
158  // Status messages to publish
159  ff_msgs::CpuStateStamped cpu_state_msg_;
160  ff_msgs::MemStateStamped mem_state_msg_;
161 
162  // Asserting faults
163  LoadFaultState load_fault_state_;
164  int assert_load_high_fault_timeout_sec_, clear_load_high_fault_timeout_sec_;
165 };
166 
167 
168 int ParseLine(char* line) {
169  // This assumes that a digit will be found and the line ends in " Kb".
170  int i = strlen(line);
171  const char* p = line;
172  for (; (*p < '0' || *p > '9') && *p != '\0'; ++p) {}
173  line[i - 3] = '\0';
174  i = atoi(p);
175  return i;
176 }
177 
178 std::string getHostfromURI(std::string uri) {
179  std::size_t uri_begin = uri.find_first_of("/");
180  std::size_t uri_end = uri.find_last_of(":");
181  if (std::string::npos != uri_begin && std::string::npos != uri_end &&
182  uri_begin <= uri_end) {
183  uri.erase(uri.begin() + uri_end, uri.end());
184  uri.erase(uri.begin(), uri.begin() + uri_begin + 2);
185  return uri;
186  } else {
187  ROS_ERROR_STREAM("Invalid URI, returning ");
188  return {};
189  }
190 }
191 
192 } // namespace cpu_mem_monitor
193 
194 #endif // CPU_MEM_MONITOR_CPU_MEM_MONITOR_H_
msg_conversions::Load
void Load(config_reader::ConfigReader &config, float &val, const std::string &config_name, const std::string &prefix="")
Definition: msg_conversions.cc:361
cpu_mem_monitor::CpuMemMonitor::ReadParams
bool ReadParams()
Definition: cpu_mem_monitor.cc:156
cpu_mem_monitor::CLEARING
@ CLEARING
Definition: cpu_mem_monitor.h:57
cpu_mem_monitor::CpuMemMonitor::Initialize
virtual void Initialize(ros::NodeHandle *nh)
Definition: cpu_mem_monitor.cc:30
ff_nodelet.h
cpu_mem_monitor
Definition: cpu.h:25
cpu_mem_monitor::CpuMemMonitor::~CpuMemMonitor
~CpuMemMonitor()
Definition: cpu_mem_monitor.cc:27
time.h
cpu_mem_monitor::CLEARED
@ CLEARED
Definition: cpu_mem_monitor.h:58
ff_util::FreeFlyerNodelet
Definition: ff_nodelet.h:57
ff_names.h
cpu_mem_monitor::ASSERTING
@ ASSERTING
Definition: cpu_mem_monitor.h:55
cpu_mem_monitor::Cpu
Definition: cpu.h:64
cpu.h
cpu_mem_monitor::getHostfromURI
std::string getHostfromURI(std::string uri)
Definition: cpu_mem_monitor.h:178
cpu_mem_monitor::CpuMemMonitor::CpuMemMonitor
CpuMemMonitor()
Definition: cpu_mem_monitor.cc:13
cpu_mem_monitor::ASSERTED
@ ASSERTED
Definition: cpu_mem_monitor.h:56
config_reader::ConfigReader
Definition: config_reader.h:48
config_reader.h
cpu_mem_monitor::LoadFaultState
LoadFaultState
Definition: cpu_mem_monitor.h:54
cpu_mem_monitor::CpuMemMonitor
Definition: cpu_mem_monitor.h:61
cpu_mem_monitor::ParseLine
int ParseLine(char *line)
Definition: cpu_mem_monitor.h:168