Kea 2.0.1
ha_service.cc
Go to the documentation of this file.
1// Copyright (C) 2018-2021 Internet Systems Consortium, Inc. ("ISC")
2//
3// This Source Code Form is subject to the terms of the Mozilla Public
4// License, v. 2.0. If a copy of the MPL was not distributed with this
5// file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7#include <config.h>
8
9#include <command_creator.h>
10#include <ha_log.h>
11#include <ha_service.h>
12#include <ha_service_states.h>
14#include <cc/data.h>
15#include <config/timeouts.h>
16#include <dhcp/iface_mgr.h>
17#include <dhcpsrv/cfgmgr.h>
18#include <dhcpsrv/lease_mgr.h>
21#include <http/date_time.h>
22#include <http/response_json.h>
25#include <util/stopwatch.h>
26#include <boost/pointer_cast.hpp>
27#include <boost/make_shared.hpp>
28#include <boost/weak_ptr.hpp>
29#include <functional>
30#include <sstream>
31
32using namespace isc::asiolink;
33using namespace isc::config;
34using namespace isc::data;
35using namespace isc::dhcp;
36using namespace isc::hooks;
37using namespace isc::http;
38using namespace isc::log;
39using namespace isc::util;
40namespace ph = std::placeholders;
41
42namespace {
43
45class CommandUnsupportedError : public CtrlChannelError {
46public:
47 CommandUnsupportedError(const char* file, size_t line, const char* what) :
48 CtrlChannelError(file, line, what) {}
49};
50
51}
52
53namespace isc {
54namespace ha {
55
65
66HAService::HAService(const IOServicePtr& io_service, const NetworkStatePtr& network_state,
67 const HAConfigPtr& config, const HAServerType& server_type)
68 : io_service_(io_service), network_state_(network_state), config_(config),
69 server_type_(server_type), client_(), listener_(), communication_state_(),
70 query_filter_(config), mutex_(), pending_requests_(),
71 lease_update_backlog_(config->getDelayedUpdatesLimit()),
72 sync_complete_notified_(false) {
73
74 if (server_type == HAServerType::DHCPv4) {
76
77 } else {
79 }
80
81 network_state_->reset(NetworkState::Origin::HA_COMMAND);
82
84
85 // Create the client and(or) listener as appropriate.
86 if (!config_->getEnableMultiThreading()) {
87 // Not configured for multi-threading, start a client in ST mode.
88 client_.reset(new HttpClient(*io_service_, 0));
89 } else {
90 // Create an MT-mode client.
92 config_->getHttpClientThreads(), true));
93
94 // If we're configured to use our own listener create and start it.
95 if (config_->getHttpDedicatedListener()) {
96 // Get the server address and port from this server's URL.
97 auto my_url = config_->getThisServerConfig()->getUrl();
98 IOAddress server_address(IOAddress::IPV4_ZERO_ADDRESS());
99 try {
100 // Since we do not currently support hostname resolution,
101 // we need to make sure we have an IP address here.
102 server_address = IOAddress(my_url.getStrippedHostname());
103 } catch (const std::exception& ex) {
104 isc_throw(Unexpected, "server Url:" << my_url.getStrippedHostname()
105 << " is not a valid IP address");
106 }
107
108 // Fetch how many threads the listener will use.
109 uint32_t listener_threads = config_->getHttpListenerThreads();
110
111 // Instantiate the listener.
112 listener_.reset(new CmdHttpListener(server_address, my_url.getPort(),
113 listener_threads));
114 }
115 }
116
118 .arg(HAConfig::HAModeToString(config->getHAMode()))
119 .arg(HAConfig::PeerConfig::roleToString(config->getThisServerConfig()->getRole()));
120}
121
123 // Stop client and/or listener.
125
126 network_state_->reset(NetworkState::Origin::HA_COMMAND);
127}
128
129void
131 StateModel::defineEvents();
132
133 defineEvent(HA_HEARTBEAT_COMPLETE_EVT, "HA_HEARTBEAT_COMPLETE_EVT");
134 defineEvent(HA_LEASE_UPDATES_COMPLETE_EVT, "HA_LEASE_UPDATES_COMPLETE_EVT");
135 defineEvent(HA_SYNCING_FAILED_EVT, "HA_SYNCING_FAILED_EVT");
136 defineEvent(HA_SYNCING_SUCCEEDED_EVT, "HA_SYNCING_SUCCEEDED_EVT");
137 defineEvent(HA_MAINTENANCE_NOTIFY_EVT, "HA_MAINTENANCE_NOTIFY_EVT");
138 defineEvent(HA_MAINTENANCE_START_EVT, "HA_MAINTENANCE_START_EVT");
139 defineEvent(HA_MAINTENANCE_CANCEL_EVT, "HA_MAINTENANCE_CANCEL_EVT");
140 defineEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT, "HA_SYNCED_PARTNER_UNAVAILABLE_EVT");
141}
142
143void
145 StateModel::verifyEvents();
146
155}
156
157void
159 StateModel::defineStates();
160
162 std::bind(&HAService::backupStateHandler, this),
163 config_->getStateMachineConfig()->getStateConfig(HA_BACKUP_ST)->getPausing());
164
167 config_->getStateMachineConfig()->getStateConfig(HA_COMMUNICATION_RECOVERY_ST)->getPausing());
168
170 std::bind(&HAService::normalStateHandler, this),
171 config_->getStateMachineConfig()->getStateConfig(HA_HOT_STANDBY_ST)->getPausing());
172
174 std::bind(&HAService::normalStateHandler, this),
175 config_->getStateMachineConfig()->getStateConfig(HA_LOAD_BALANCING_ST)->getPausing());
176
178 std::bind(&HAService::inMaintenanceStateHandler, this),
179 config_->getStateMachineConfig()->getStateConfig(HA_IN_MAINTENANCE_ST)->getPausing());
180
182 std::bind(&HAService::partnerDownStateHandler, this),
183 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_DOWN_ST)->getPausing());
184
187 config_->getStateMachineConfig()->getStateConfig(HA_PARTNER_IN_MAINTENANCE_ST)->getPausing());
188
190 std::bind(&HAService::passiveBackupStateHandler, this),
191 config_->getStateMachineConfig()->getStateConfig(HA_PASSIVE_BACKUP_ST)->getPausing());
192
194 std::bind(&HAService::readyStateHandler, this),
195 config_->getStateMachineConfig()->getStateConfig(HA_READY_ST)->getPausing());
196
198 std::bind(&HAService::syncingStateHandler, this),
199 config_->getStateMachineConfig()->getStateConfig(HA_SYNCING_ST)->getPausing());
200
202 std::bind(&HAService::terminatedStateHandler, this),
203 config_->getStateMachineConfig()->getStateConfig(HA_TERMINATED_ST)->getPausing());
204
206 std::bind(&HAService::waitingStateHandler, this),
207 config_->getStateMachineConfig()->getStateConfig(HA_WAITING_ST)->getPausing());
208}
209
210void
212 if (doOnEntry()) {
215
216 // Log if the state machine is paused.
218 }
219
220 // There is nothing to do in that state. This server simply receives
221 // lease updates from the partners.
223}
224
225void
227 if (doOnEntry()) {
230
231 // Log if the state machine is paused.
233 }
234
236
239
240 // Check if the clock skew is still acceptable. If not, transition to
241 // the terminated state.
242 } else if (shouldTerminate()) {
244
245 } else if (isPartnerStateInvalid()) {
247
248 } else {
249
250 // Transitions based on the partner's state.
251 switch (communication_state_->getPartnerState()) {
254 break;
255
258 break;
259
262 break;
263
264 case HA_TERMINATED_ST:
266 break;
267
269 if (shouldPartnerDown()) {
271
272 } else {
274 }
275 break;
276
277 case HA_WAITING_ST:
278 case HA_SYNCING_ST:
279 case HA_READY_ST:
280 // The partner seems to be waking up, perhaps after communication-recovery.
281 // If our backlog queue is overflown we need to synchronize our lease database.
282 // There is no need to send ha-reset to the partner because the partner is
283 // already synchronizing its lease database.
284 if (!communication_state_->isCommunicationInterrupted() &&
287 } else {
288 // Backlog was not overflown, so there is no need to synchronize our
289 // lease database. Let's wait until our partner completes synchronization
290 // and transitions to the load-balancing state.
292 }
293 break;
294
295 default:
296 // If the communication is still interrupted, let's continue sitting
297 // in this state until it is resumed or until the transition to the
298 // partner-down state, depending on what happens first.
299 if (communication_state_->isCommunicationInterrupted()) {
301 break;
302 }
303
304 // The communication has been resumed. The partner server must be in a state
305 // in which it can receive outstanding lease updates we collected. The number of
306 // outstanding lease updates must not exceed the configured limit. Finally, the
307 // lease updates must be successfully sent. If that all works, we will transition
308 // to the normal operation.
309 if ((communication_state_->getPartnerState() == getNormalState()) ||
310 (communication_state_->getPartnerState() == HA_COMMUNICATION_RECOVERY_ST)) {
312 // If our lease backlog was overflown or we were unable to send lease
313 // updates to the partner we should notify the partner that it should
314 // synchronize the lease database. We do it by sending ha-reset command.
315 if (sendHAReset()) {
317 }
318 break;
319 }
320 // The backlog was not overflown and we successfully sent our lease updates.
321 // We can now transition to the normal operation state. If the partner
322 // fails to send his outstanding lease updates to us it should send the
323 // ha-reset command to us.
325 break;
326 }
327
328 // The partner appears to be in unexpected state, we have exceeded the number
329 // of lease updates in a backlog or an attempt to send lease updates failed.
330 // In all these cases we follow plan B and transition to the waiting state.
331 // The server will then attempt to synchronize the entire lease database.
333 }
334 }
335
336 // When exiting this state we must ensure that lease updates backlog is cleared.
337 if (doOnExit()) {
339 }
340}
341
342void
344 // If we are transitioning from another state, we have to define new
345 // serving scopes appropriate for the new state. We don't do it if
346 // we remain in this state.
347 if (doOnEntry()) {
350
351 // Log if the state machine is paused.
353 }
354
356
359 return;
360 }
361
362 // Check if the clock skew is still acceptable. If not, transition to
363 // the terminated state.
364 if (shouldTerminate()) {
366 return;
367 }
368
369 // Check if the partner state is valid per current configuration. If it is
370 // in an invalid state let's transition to the waiting state and stay there
371 // until the configuration is corrected.
372 if (isPartnerStateInvalid()) {
374 return;
375 }
376
377 switch (communication_state_->getPartnerState()) {
380 break;
381
384 break;
385
388 break;
389
390 case HA_TERMINATED_ST:
392 break;
393
395 if (shouldPartnerDown()) {
397
398 } else if (config_->amAllowingCommRecovery()) {
400
401 } else {
403 }
404 break;
405
406 default:
408 }
409
410 if (doOnExit()) {
411 // Do nothing here but doOnExit() call clears the "on exit" flag
412 // when transitioning to the communication-recovery state. In that
413 // state we need this flag to be cleared.
414 }
415}
416
417void
419 // If we are transitioning from another state, we have to define new
420 // serving scopes appropriate for the new state. We don't do it if
421 // we remain in this state.
422 if (doOnEntry()) {
423 // In this state the server remains silent and waits for being
424 // shutdown.
427
428 // Log if the state machine is paused.
430
432 }
433
435
436 // We don't transition out of this state unless explicitly mandated
437 // by the administrator via a dedicated command which cancels
438 // the maintenance.
440}
441
442void
444 // If we are transitioning from another state, we have to define new
445 // serving scopes appropriate for the new state. We don't do it if
446 // we remain in this state.
447 if (doOnEntry()) {
448
449 bool maintenance = (getLastEvent() == HA_MAINTENANCE_START_EVT);
450
451 // It may be administratively disabled to handle partner's scope
452 // in case of failure. If this is the case we'll just handle our
453 // default scope (or no scope at all). The user will need to
454 // manually enable this server to handle partner's scope.
455 // If we're in the maintenance mode we serve all scopes because
456 // it is not a failover situation.
457 if (maintenance || config_->getThisServerConfig()->isAutoFailover()) {
459 } else {
461 }
463
464 // Log if the state machine is paused.
466
467 if (maintenance) {
468 // If we ended up in the partner-down state as a result of
469 // receiving the ha-maintenance-start command let's log it.
471 }
472
474 // Partner sent the ha-sync-complete-notify command to indicate that
475 // it has successfully synchronized its lease database but this server
476 // was unable to send heartbeat to this server. Enable the DHCP service
477 // and continue serving the clients in the partner-down state until the
478 // communication with the partner is fixed.
480 }
481
483
486 return;
487 }
488
489 // Check if the clock skew is still acceptable. If not, transition to
490 // the terminated state.
491 if (shouldTerminate()) {
493 return;
494 }
495
496 // Check if the partner state is valid per current configuration. If it is
497 // in an invalid state let's transition to the waiting state and stay there
498 // until the configuration is corrected.
499 if (isPartnerStateInvalid()) {
501 return;
502 }
503
504 switch (communication_state_->getPartnerState()) {
509 break;
510
511 case HA_READY_ST:
512 // If partner allocated new leases for which it didn't send lease updates
513 // to us we should synchronize our database.
514 if (communication_state_->hasPartnerNewUnsentUpdates()) {
516 } else {
517 // We did not miss any lease updates. There is no need to synchronize
518 // the database.
520 }
521 break;
522
523 case HA_TERMINATED_ST:
525 break;
526
527 default:
529 }
530}
531
532void
534 // If we are transitioning from another state, we have to define new
535 // serving scopes appropriate for the new state. We don't do it if
536 // we remain in this state.
537 if (doOnEntry()) {
539
541
542 // Log if the state machine is paused.
544
546 }
547
549
550 if (isModelPaused()) {
552 return;
553 }
554
555 // Check if the clock skew is still acceptable. If not, transition to
556 // the terminated state.
557 if (shouldTerminate()) {
559 return;
560 }
561
562 switch (communication_state_->getPartnerState()) {
565 break;
566 default:
568 }
569}
570
571void
573 // If we are transitioning from another state, we have to define new
574 // serving scopes appropriate for the new state. We don't do it if
575 // we remain in this state.
576 if (doOnEntry()) {
579
580 // In the passive-backup state we don't send heartbeat.
581 communication_state_->stopHeartbeat();
582
583 // Log if the state machine is paused.
585 }
587}
588
589void
591 // If we are transitioning from another state, we have to define new
592 // serving scopes appropriate for the new state. We don't do it if
593 // we remain in this state.
594 if (doOnEntry()) {
597
598 // Log if the state machine is paused.
600 }
601
603
606 return;
607 }
608
609 // Check if the clock skew is still acceptable. If not, transition to
610 // the terminated state.
611 if (shouldTerminate()) {
613 return;
614 }
615
616 // Check if the partner state is valid per current configuration. If it is
617 // in an invalid state let's transition to the waiting state and stay there
618 // until the configuration is corrected.
619 if (isPartnerStateInvalid()) {
621 return;
622 }
623
624 switch (communication_state_->getPartnerState()) {
629 break;
630
633 break;
634
637 break;
638
639 case HA_READY_ST:
640 // If both servers are ready, the primary server "wins" and is
641 // transitioned first.
642 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
645 } else {
647 }
648 break;
649
650 case HA_TERMINATED_ST:
652 break;
653
655 if (shouldPartnerDown()) {
657
658 } else {
660 }
661 break;
662
663 default:
665 }
666}
667
668void
670 // If we are transitioning from another state, we have to define new
671 // serving scopes appropriate for the new state. We don't do it if
672 // we remain in this state.
673 if (doOnEntry()) {
676
677 // Log if the state machine is paused.
679 }
680
683 return;
684 }
685
686 // Check if the clock skew is still acceptable. If not, transition to
687 // the terminated state.
688 if (shouldTerminate()) {
690 return;
691 }
692
693 // Check if the partner state is valid per current configuration. If it is
694 // in an invalid state let's transition to the waiting state and stay there
695 // until the configuration is corrected.
696 if (isPartnerStateInvalid()) {
698 return;
699 }
700
701 // We don't want to perform synchronous attempt to synchronize with
702 // a partner until we know that the partner is responding. Therefore,
703 // we wait for the heartbeat to complete successfully before we
704 // initiate the synchronization.
705 switch (communication_state_->getPartnerState()) {
706 case HA_TERMINATED_ST:
708 return;
709
711 // If the partner appears to be offline, let's transition to the partner
712 // down state. Otherwise, we'd be stuck trying to synchronize with a
713 // dead partner.
714 if (shouldPartnerDown()) {
716
717 } else {
719 }
720 break;
721
722 default:
723 // We don't want the heartbeat to interfere with the synchronization,
724 // so let's temporarily stop it.
725 communication_state_->stopHeartbeat();
726
727 // Timeout is configured in milliseconds. Need to convert to seconds.
728 unsigned int dhcp_disable_timeout =
729 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
730 if (dhcp_disable_timeout == 0) {
731 ++dhcp_disable_timeout;
732 }
733
734 // Perform synchronous leases update.
735 std::string status_message;
736 int sync_status = synchronize(status_message,
737 config_->getFailoverPeerConfig()->getName(),
738 dhcp_disable_timeout);
739
740 // If the leases synchronization was successful, let's transition
741 // to the ready state.
742 if (sync_status == CONTROL_RESULT_SUCCESS) {
744
745 } else {
746 // If the synchronization was unsuccessful we're back to the
747 // situation that the partner is unavailable and therefore
748 // we stay in the syncing state.
750 }
751 }
752
753 // Make sure that the heartbeat is re-enabled.
755}
756
757void
759 // If we are transitioning from another state, we have to define new
760 // serving scopes appropriate for the new state. We don't do it if
761 // we remain in this state.
762 if (doOnEntry()) {
765
766 // In the terminated state we don't send heartbeat.
767 communication_state_->stopHeartbeat();
768
769 // Log if the state machine is paused.
771
773 }
774
776}
777
778void
780 // If we are transitioning from another state, we have to define new
781 // serving scopes appropriate for the new state. We don't do it if
782 // we remain in this state.
783 if (doOnEntry()) {
786
787 // Log if the state machine is paused.
789 }
790
791 // Only schedule the heartbeat for non-backup servers.
792 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
793 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
795 }
796
799 return;
800 }
801
802 // Backup server must remain in its own state.
803 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
805 return;
806 }
807
808 // We're not a backup server, so we're either primary or secondary. If this is
809 // a passive-backup mode of operation, we're primary and we should transition
810 // to the passive-backup state.
811 if (config_->getHAMode() == HAConfig::PASSIVE_BACKUP) {
813 return;
814 }
815
816 // Check if the clock skew is still acceptable. If not, transition to
817 // the terminated state.
818 if (shouldTerminate()) {
820 return;
821 }
822
823 // Check if the partner state is valid per current configuration. If it is
824 // in an invalid state let's sit in the waiting state until the configuration
825 // is corrected.
826 if (isPartnerStateInvalid()) {
828 return;
829 }
830
831 switch (communication_state_->getPartnerState()) {
838 case HA_READY_ST:
839 // If we're configured to not synchronize lease database, proceed directly
840 // to the "ready" state.
841 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
842 break;
843
844 case HA_SYNCING_ST:
846 break;
847
848 case HA_TERMINATED_ST:
849 // We have checked above whether the clock skew is exceeding the threshold
850 // and we should terminate. If we're here, it means that the clock skew
851 // is acceptable. The partner may be still in the terminated state because
852 // it hasn't been restarted yet. Probably, this server is the first one
853 // being restarted after syncing the clocks. Let's just sit in the waiting
854 // state until the partner gets restarted.
857 break;
858
859 case HA_WAITING_ST:
860 // If both servers are waiting, the primary server 'wins' and is
861 // transitioned to the next state first.
862 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::PRIMARY) {
863 // If we're configured to not synchronize lease database, proceed directly
864 // to the "ready" state.
865 verboseTransition(config_->amSyncingLeases() ? HA_SYNCING_ST : HA_READY_ST);
866
867 } else {
869 }
870 break;
871
873 if (shouldPartnerDown()) {
875
876 } else {
878 }
879 break;
880
881 default:
883 }
884}
885
886void
887HAService::verboseTransition(const unsigned state) {
888 // Get current and new state name.
889 std::string current_state_name = getStateLabel(getCurrState());
890 std::string new_state_name = getStateLabel(state);
891
892 // Turn them to upper case so as they are better visible in the logs.
893 boost::to_upper(current_state_name);
894 boost::to_upper(new_state_name);
895
896 if (config_->getHAMode() != HAConfig::PASSIVE_BACKUP) {
897 // If this is load-balancing or hot-standby mode we also want to log
898 // partner's state.
899 auto partner_state = communication_state_->getPartnerState();
900 std::string partner_state_name = getStateLabel(partner_state);
901 boost::to_upper(partner_state_name);
902
903 // Log the transition.
905 .arg(current_state_name)
906 .arg(new_state_name)
907 .arg(partner_state_name);
908
909 } else {
910 // In the passive-backup mode we don't know the partner's state.
912 .arg(current_state_name)
913 .arg(new_state_name);
914 }
915
916 // If we're transitioning directly from the "waiting" to "ready"
917 // state it indicates that the database synchronization is
918 // administratively disabled. Let's remind the user about this
919 // configuration setting.
920 if ((state == HA_READY_ST) && (getCurrState() == HA_WAITING_ST)) {
922 }
923
924 // Do the actual transition.
925 transition(state, getNextEvent());
926
927 // Inform the administrator whether or not lease updates are generated.
928 // Updates are never generated by a backup server so it doesn't make
929 // sense to log anything for the backup server.
930 if ((config_->getHAMode() != HAConfig::PASSIVE_BACKUP) &&
931 (config_->getThisServerConfig()->getRole() != HAConfig::PeerConfig::BACKUP)) {
932 if (shouldSendLeaseUpdates(config_->getFailoverPeerConfig())) {
934 .arg(new_state_name);
935
936 } else if (!config_->amSendingLeaseUpdates()) {
937 // Lease updates are administratively disabled.
939 .arg(new_state_name);
940
941 } else {
942 // Lease updates are not administratively disabled, but they
943 // are not issued because this is the backup server or because
944 // in this state the server should not generate lease updates.
946 .arg(new_state_name);
947 }
948 }
949}
950
951int
953 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
954 return (HA_BACKUP_ST);
955 }
956
957 switch (config_->getHAMode()) {
959 return (HA_LOAD_BALANCING_ST);
961 return (HA_HOT_STANDBY_ST);
962 default:
963 return (HA_PASSIVE_BACKUP_ST);
964 }
965}
966
967bool
969 if (isModelPaused()) {
971 unpauseModel();
972 return (true);
973 }
974 return (false);
975}
976
977void
979 // Inform the administrator if the state machine is paused.
980 if (isModelPaused()) {
981 std::string state_name = stateToString(getCurrState());
982 boost::to_upper(state_name);
984 .arg(state_name);
985 }
986}
987
988void
991}
992
993bool
995 return (inScopeInternal(query4));
996}
997
998bool
1000 return (inScopeInternal(query6));
1001}
1002
1003template<typename QueryPtrType>
1004bool
1005HAService::inScopeInternal(QueryPtrType& query) {
1006 // Check if the query is in scope (should be processed by this server).
1007 std::string scope_class;
1008 const bool in_scope = query_filter_.inScope(query, scope_class);
1009 // Whether or not the query is going to be processed by this server,
1010 // we associate the query with the appropriate class.
1011 query->addClass(dhcp::ClientClass(scope_class));
1012 // The following is the part of the server failure detection algorithm.
1013 // If the query should be processed by the partner we need to check if
1014 // the partner responds. If the number of unanswered queries exceeds a
1015 // configured threshold, we will consider the partner to be offline.
1016 if (!in_scope && communication_state_->isCommunicationInterrupted()) {
1017 communication_state_->analyzeMessage(query);
1018 }
1019 // Indicate if the query is in scope.
1020 return (in_scope);
1021}
1022
1023void
1025 std::string current_state_name = getStateLabel(getCurrState());
1026 boost::to_upper(current_state_name);
1027
1028 // DHCP service should be enabled in the following states.
1029 const bool should_enable = ((getCurrState() == HA_COMMUNICATION_RECOVERY_ST) ||
1036
1037 if (!should_enable && network_state_->isServiceEnabled()) {
1038 std::string current_state_name = getStateLabel(getCurrState());
1039 boost::to_upper(current_state_name);
1041 .arg(config_->getThisServerName())
1042 .arg(current_state_name);
1043 network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1044
1045 } else if (should_enable && !network_state_->isServiceEnabled()) {
1046 std::string current_state_name = getStateLabel(getCurrState());
1047 boost::to_upper(current_state_name);
1049 .arg(config_->getThisServerName())
1050 .arg(current_state_name);
1051 network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1052 }
1053}
1054
1055bool
1057 // Checking whether the communication with the partner is OK is the
1058 // first step towards verifying if the server is up.
1059 if (communication_state_->isCommunicationInterrupted()) {
1060 // If the communication is interrupted, we also have to check
1061 // whether the partner answers DHCP requests. The only cases
1062 // when we don't (can't) do it are: the hot standby configuration
1063 // in which this server is a primary and when the DHCP service is
1064 // disabled so we can't analyze incoming traffic. Note that the
1065 // primary server can't check delayed responses to the partner
1066 // because the partner doesn't respond to any queries in this
1067 // configuration.
1068 if (network_state_->isServiceEnabled() &&
1069 ((config_->getHAMode() == HAConfig::LOAD_BALANCING) ||
1070 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::STANDBY))) {
1071 return (communication_state_->failureDetected());
1072 }
1073
1074 // Hot standby / primary case.
1075 return (true);
1076 }
1077
1078 // Shouldn't transition to the partner down state.
1079 return (false);
1080}
1081
1082bool
1084 // Check if skew is fatally large.
1085 bool should_terminate = communication_state_->clockSkewShouldTerminate();
1086
1087 // If not issue a warning if it's getting large.
1088 if (!should_terminate) {
1089 communication_state_->clockSkewShouldWarn();
1090 }
1091
1092 return (should_terminate);
1093}
1094
1095bool
1098}
1099
1100bool
1102 switch (communication_state_->getPartnerState()) {
1104 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1106 return (true);
1107 }
1108 break;
1109
1110 case HA_HOT_STANDBY_ST:
1111 if (config_->getHAMode() != HAConfig::HOT_STANDBY) {
1113 return (true);
1114 }
1115 break;
1116
1118 if (config_->getHAMode() != HAConfig::LOAD_BALANCING) {
1120 return (true);
1121 }
1122 break;
1123
1124 default:
1125 ;
1126 }
1127 return (false);
1128}
1129
1130size_t
1132 const dhcp::Lease4CollectionPtr& leases,
1133 const dhcp::Lease4CollectionPtr& deleted_leases,
1134 const hooks::ParkingLotHandlePtr& parking_lot) {
1135
1136 // Get configurations of the peers. Exclude this instance.
1137 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1138
1139 size_t sent_num = 0;
1140
1141 // Schedule sending lease updates to each peer.
1142 for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1143 HAConfig::PeerConfigPtr conf = p->second;
1144
1145 // Check if the lease updates should be queued. This is the case when the
1146 // server is in the communication-recovery state. Queued lease updates may
1147 // be sent when the communication is re-established.
1148 if (shouldQueueLeaseUpdates(conf)) {
1149 // Lease updates for deleted leases.
1150 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1152 }
1153
1154 // Lease updates for new allocations and updated leases.
1155 for (auto l = leases->begin(); l != leases->end(); ++l) {
1157 }
1158
1159 continue;
1160 }
1161
1162 // Check if the lease update should be sent to the server. If we're in
1163 // the partner-down state we don't send lease updates to the partner.
1164 if (!shouldSendLeaseUpdates(conf)) {
1165 // If we decide to not send the lease updates to an active partner, we
1166 // should make a record of it in the communication state. The partner
1167 // can check if there were any unsent lease updates when he determines
1168 // whether it should synchronize its database or not when it recovers
1169 // from the partner-down state.
1170 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1171 communication_state_->increaseUnsentUpdateCount();
1172 }
1173 continue;
1174 }
1175
1176 // Lease updates for deleted leases.
1177 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1179 parking_lot);
1180 }
1181
1182 // Lease updates for new allocations and updated leases.
1183 for (auto l = leases->begin(); l != leases->end(); ++l) {
1185 parking_lot);
1186 }
1187
1188 // If we're contacting a backup server from which we don't expect a
1189 // response prior to responding to the DHCP client we don't count
1190 // it.
1191 if ((config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP))) {
1192 ++sent_num;
1193 }
1194 }
1195
1196 return (sent_num);
1197}
1198
1199size_t
1201 const dhcp::Lease6CollectionPtr& leases,
1202 const dhcp::Lease6CollectionPtr& deleted_leases,
1203 const hooks::ParkingLotHandlePtr& parking_lot) {
1204
1205 // Get configurations of the peers. Exclude this instance.
1206 HAConfig::PeerConfigMap peers_configs = config_->getOtherServersConfig();
1207
1208 size_t sent_num = 0;
1209
1210 // Schedule sending lease updates to each peer.
1211 for (auto p = peers_configs.begin(); p != peers_configs.end(); ++p) {
1212 HAConfig::PeerConfigPtr conf = p->second;
1213
1214 // Check if the lease updates should be queued. This is the case when the
1215 // server is in the communication-recovery state. Queued lease updates may
1216 // be sent when the communication is re-established.
1217 if (shouldQueueLeaseUpdates(conf)) {
1218 for (auto l = deleted_leases->begin(); l != deleted_leases->end(); ++l) {
1220 }
1221
1222 // Lease updates for new allocations and updated leases.
1223 for (auto l = leases->begin(); l != leases->end(); ++l) {
1225 }
1226
1227 continue;
1228 }
1229
1230 // Check if the lease update should be sent to the server. If we're in
1231 // the partner-down state we don't send lease updates to the partner.
1232 if (!shouldSendLeaseUpdates(conf)) {
1233 // If we decide to not send the lease updates to an active partner, we
1234 // should make a record of it in the communication state. The partner
1235 // can check if there were any unsent lease updates when he determines
1236 // whether it should synchronize its database or not when it recovers
1237 // from the partner-down state.
1238 if (conf->getRole() != HAConfig::PeerConfig::BACKUP) {
1239 communication_state_->increaseUnsentUpdateCount();
1240 }
1241 continue;
1242 }
1243
1244 // If we're contacting a backup server from which we don't expect a
1245 // response prior to responding to the DHCP client we don't count
1246 // it.
1247 if (config_->amWaitingBackupAck() || (conf->getRole() != HAConfig::PeerConfig::BACKUP)) {
1248 ++sent_num;
1249 }
1250
1251 // Send new/updated leases and deleted leases in one command.
1252 asyncSendLeaseUpdate(query, conf, CommandCreator::createLease6BulkApply(leases, deleted_leases),
1253 parking_lot);
1254 }
1255
1256 return (sent_num);
1257}
1258
1259template<typename QueryPtrType>
1260bool
1262 const ParkingLotHandlePtr& parking_lot) {
1263 if (MultiThreadingMgr::instance().getMode()) {
1264 std::lock_guard<std::mutex> lock(mutex_);
1265 return (leaseUpdateCompleteInternal(query, parking_lot));
1266 } else {
1267 return (leaseUpdateCompleteInternal(query, parking_lot));
1268 }
1269}
1270
1271template<typename QueryPtrType>
1272bool
1273HAService::leaseUpdateCompleteInternal(QueryPtrType& query,
1274 const ParkingLotHandlePtr& parking_lot) {
1275 auto it = pending_requests_.find(query);
1276
1277 // If there are no more pending requests for this query, let's unpark
1278 // the DHCP packet.
1279 if (it == pending_requests_.end() || (--pending_requests_[query] <= 0)) {
1280 parking_lot->unpark(query);
1281
1282 // If we have unparked the packet we can clear pending requests for
1283 // this query.
1284 if (it != pending_requests_.end()) {
1285 pending_requests_.erase(it);
1286 }
1287 return (true);
1288 }
1289 return (false);
1290}
1291
1292template<typename QueryPtrType>
1293void
1295 if (MultiThreadingMgr::instance().getMode()) {
1296 std::lock_guard<std::mutex> lock(mutex_);
1297 updatePendingRequestInternal(query);
1298 } else {
1299 updatePendingRequestInternal(query);
1300 }
1301}
1302
1303template<typename QueryPtrType>
1304void
1305HAService::updatePendingRequestInternal(QueryPtrType& query) {
1306 if (pending_requests_.count(query) == 0) {
1307 pending_requests_[query] = 1;
1308 } else {
1309 ++pending_requests_[query];
1310 }
1311}
1312
1313template<typename QueryPtrType>
1314void
1315HAService::asyncSendLeaseUpdate(const QueryPtrType& query,
1316 const HAConfig::PeerConfigPtr& config,
1317 const ConstElementPtr& command,
1318 const ParkingLotHandlePtr& parking_lot) {
1319 // Create HTTP/1.1 request including our command.
1320 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1321 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1322 HostHttpHeader(config->getUrl().getHostname()));
1323 config->addBasicAuthHttpHeader(request);
1324 request->setBodyAsJson(command);
1325 request->finalize();
1326
1327 // Response object should also be created because the HTTP client needs
1328 // to know the type of the expected response.
1329 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1330
1331 // When possible we prefer to pass weak pointers to the queries, rather
1332 // than shared pointers, to avoid memory leaks in case cross reference
1333 // between the pointers.
1334 boost::weak_ptr<typename QueryPtrType::element_type> weak_query(query);
1335
1336 // Schedule asynchronous HTTP request.
1337 client_->asyncSendRequest(config->getUrl(), config->getTlsContext(),
1338 request, response,
1339 [this, weak_query, parking_lot, config]
1340 (const boost::system::error_code& ec,
1341 const HttpResponsePtr& response,
1342 const std::string& error_str) {
1343 // Get the shared pointer of the query. The server should keep the
1344 // pointer to the query and then park it. Therefore, we don't really
1345 // expect it to be null. If it is null, something is really wrong.
1346 QueryPtrType query = weak_query.lock();
1347 if (!query) {
1348 isc_throw(Unexpected, "query is null while receiving response from"
1349 " HA peer. This is programmatic error");
1350 }
1351
1352 // There are three possible groups of errors during the lease update.
1353 // One is the IO error causing issues in communication with the peer.
1354 // Another one is an HTTP parsing error. The last type of error is
1355 // when non-success error code is returned in the response carried
1356 // in the HTTP message or if the JSON response is otherwise broken.
1357
1358 bool lease_update_success = true;
1359
1360 // Handle first two groups of errors.
1361 if (ec || !error_str.empty()) {
1362 LOG_WARN(ha_logger, HA_LEASE_UPDATE_COMMUNICATIONS_FAILED)
1363 .arg(query->getLabel())
1364 .arg(config->getLogLabel())
1365 .arg(ec ? ec.message() : error_str);
1366
1367 // Communication error, so let's drop parked packet. The DHCP
1368 // response will not be sent.
1369 lease_update_success = false;
1370
1371 } else {
1372
1373 // Handle third group of errors.
1374 try {
1375 int rcode = 0;
1376 auto args = verifyAsyncResponse(response, rcode);
1377 // In the v6 case the server may return a list of failed lease
1378 // updates and we should log them.
1379 logFailedLeaseUpdates(query, args);
1380
1381 } catch (const std::exception& ex) {
1383 .arg(query->getLabel())
1384 .arg(config->getLogLabel())
1385 .arg(ex.what());
1386
1387 // Error while doing an update. The DHCP response will not be sent.
1388 lease_update_success = false;
1389 }
1390 }
1391
1392 // We don't care about the result of the lease update to the backup server.
1393 // It is a best effort update.
1394 if ((config->getRole() != HAConfig::PeerConfig::BACKUP) && !lease_update_success) {
1395 // If we were unable to communicate with the partner we set partner's
1396 // state as unavailable.
1397 communication_state_->setPartnerState("unavailable");
1398 }
1399
1400 // It is possible to configure the server to not wait for a response from
1401 // the backup server before we unpark the packet and respond to the client.
1402 // Here we check if we're dealing with such situation.
1403 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1404 // We're expecting a response from the backup server or it is not
1405 // a backup server and the lease update was unsuccessful. In such
1406 // case the DHCP exchange fails.
1407 if (!lease_update_success) {
1408 parking_lot->drop(query);
1409 }
1410 } else {
1411 // This was a response from the backup server and we're configured to
1412 // not wait for their acknowledgments, so there is nothing more to do.
1413 return;
1414 }
1415
1416 if (leaseUpdateComplete(query, parking_lot)) {
1417 // If we have finished sending the lease updates we need to run the
1418 // state machine until the state machine finds that additional events
1419 // are required, such as next heartbeat or a lease update. The runModel()
1420 // may transition to another state, schedule asynchronous tasks etc.
1421 // Then it returns control to the DHCP server.
1422 runModel(HA_LEASE_UPDATES_COMPLETE_EVT);
1423 }
1424 },
1426 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1427 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1428 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1429 );
1430
1431 // The number of pending requests is the number of requests for which we
1432 // expect an acknowledgment prior to responding to the DHCP clients. If
1433 // we're configured to wait for the acks from the backups or it is not
1434 // a backup increase the number of pending requests.
1435 if (config_->amWaitingBackupAck() || (config->getRole() != HAConfig::PeerConfig::BACKUP)) {
1436 // Request scheduled, so update the request counters for the query.
1437 updatePendingRequest(query);
1438 }
1439}
1440
1441bool
1442HAService::shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1443 // Never send lease updates if they are administratively disabled.
1444 if (!config_->amSendingLeaseUpdates()) {
1445 return (false);
1446 }
1447
1448 // Always send updates to the backup server.
1449 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1450 return (true);
1451 }
1452
1453 // Never send updates if this is a backup server.
1454 if (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP) {
1455 return (false);
1456 }
1457
1458 // In other case, whether we send lease updates or not depends on our
1459 // state.
1460 switch (getCurrState()) {
1461 case HA_HOT_STANDBY_ST:
1464 return (true);
1465
1466 default:
1467 ;
1468 }
1469
1470 return (false);
1471}
1472
1473bool
1474HAService::shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr& peer_config) const {
1475 if (!config_->amSendingLeaseUpdates()) {
1476 return (false);
1477 }
1478
1479 if (peer_config->getRole() == HAConfig::PeerConfig::BACKUP) {
1480 return (false);
1481 }
1482
1483 return (getCurrState() == HA_COMMUNICATION_RECOVERY_ST);
1484}
1485
1486void
1487HAService::logFailedLeaseUpdates(const PktPtr& query,
1488 const ConstElementPtr& args) const {
1489 // If there are no arguments, it means that the update was successful.
1490 if (!args || (args->getType() != Element::map)) {
1491 return;
1492 }
1493
1494 // Instead of duplicating the code between the failed-deleted-leases and
1495 // failed-leases, let's just have one function that does it for both.
1496 auto log_proc = [](const PktPtr query, const ConstElementPtr& args,
1497 const std::string& param_name, const log::MessageID& mesid) {
1498
1499 // Check if there are any failed leases.
1500 auto failed_leases = args->get(param_name);
1501
1502 // The failed leases must be a list.
1503 if (failed_leases && (failed_leases->getType() == Element::list)) {
1504 // Go over the failed leases and log each of them.
1505 for (int i = 0; i < failed_leases->size(); ++i) {
1506 auto lease = failed_leases->get(i);
1507 if (lease->getType() == Element::map) {
1508
1509 // ip-address
1510 auto ip_address = lease->get("ip-address");
1511
1512 // lease type
1513 auto lease_type = lease->get("type");
1514
1515 // error-message
1516 auto error_message = lease->get("error-message");
1517
1518 LOG_INFO(ha_logger, mesid)
1519 .arg(query->getLabel())
1520 .arg(lease_type && (lease_type->getType() == Element::string) ?
1521 lease_type->stringValue() : "(unknown)")
1522 .arg(ip_address && (ip_address->getType() == Element::string) ?
1523 ip_address->stringValue() : "(unknown)")
1524 .arg(error_message && (error_message->getType() == Element::string) ?
1525 error_message->stringValue() : "(unknown)");
1526 }
1527 }
1528 }
1529 };
1530
1531 // Process "failed-deleted-leases"
1532 log_proc(query, args, "failed-deleted-leases", HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER);
1533
1534 // Process "failed-leases".
1535 log_proc(query, args, "failed-leases", HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER);
1536}
1537
1539HAService::processStatusGet() const {
1540 ElementPtr ha_servers = Element::createMap();
1541
1542 // Local part
1543 ElementPtr local = Element::createMap();
1545 role = config_->getThisServerConfig()->getRole();
1546 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1547 local->set("role", Element::create(role_txt));
1548 int state = getCurrState();
1549 try {
1550 local->set("state", Element::create(stateToString(state)));
1551
1552 } catch (...) {
1553 // Empty string on error.
1554 local->set("state", Element::create(std::string()));
1555 }
1556 std::set<std::string> scopes = query_filter_.getServedScopes();
1557 ElementPtr list = Element::createList();
1558 for (std::string scope : scopes) {
1559 list->add(Element::create(scope));
1560 }
1561 local->set("scopes", list);
1562 ha_servers->set("local", local);
1563
1564 // Do not include remote server information if this is a backup server or
1565 // we're in the passive-backup mode.
1566 if ((config_->getHAMode() == HAConfig::PASSIVE_BACKUP) ||
1567 (config_->getThisServerConfig()->getRole() == HAConfig::PeerConfig::BACKUP)) {
1568 return (ha_servers);
1569 }
1570
1571 // Remote part
1572 ElementPtr remote = communication_state_->getReport();
1573
1574 try {
1575 role = config_->getFailoverPeerConfig()->getRole();
1576 std::string role_txt = HAConfig::PeerConfig::roleToString(role);
1577 remote->set("role", Element::create(role_txt));
1578
1579 } catch (...) {
1580 remote->set("role", Element::create(std::string()));
1581 }
1582 ha_servers->set("remote", remote);
1583
1584 return (ha_servers);
1585}
1586
1588HAService::processHeartbeat() {
1589 ElementPtr arguments = Element::createMap();
1590 std::string state_label = getState(getCurrState())->getLabel();
1591 arguments->set("state", Element::create(state_label));
1592
1593 std::string date_time = HttpDateTime().rfc1123Format();
1594 arguments->set("date-time", Element::create(date_time));
1595
1596 auto scopes = query_filter_.getServedScopes();
1597 ElementPtr scopes_list = Element::createList();
1598 for (auto scope : scopes) {
1599 scopes_list->add(Element::create(scope));
1600 }
1601 arguments->set("scopes", scopes_list);
1602
1603 arguments->set("unsent-update-count",
1604 Element::create(static_cast<int64_t>(communication_state_->getUnsentUpdateCount())));
1605
1606 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA peer status returned.",
1607 arguments));
1608}
1609
1611HAService::processHAReset() {
1612 if (getCurrState() == HA_WAITING_ST) {
1613 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine already in WAITING state."));
1614 }
1615 verboseTransition(HA_WAITING_ST);
1616 runModel(NOP_EVT);
1617 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine reset."));
1618}
1619
1620void
1621HAService::asyncSendHeartbeat() {
1622 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
1623
1624 // If the sync_complete_notified_ is true it means that the partner
1625 // notified us that it had completed lease database synchronization.
1626 // We confirm that the partner is operational by sending the heartbeat
1627 // to it. Regardless if the partner responds to our heartbeats or not,
1628 // we should clear this flag. But, since we need the current value in
1629 // the async call handler, we save it in the local variable before
1630 // clearing it.
1631 bool sync_complete_notified = sync_complete_notified_;
1632 sync_complete_notified_ = false;
1633
1634 // Create HTTP/1.1 request including our command.
1635 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1636 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1637 HostHttpHeader(partner_config->getUrl().getHostname()));
1638 partner_config->addBasicAuthHttpHeader(request);
1639 request->setBodyAsJson(CommandCreator::createHeartbeat(server_type_));
1640 request->finalize();
1641
1642 // Response object should also be created because the HTTP client needs
1643 // to know the type of the expected response.
1644 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1645
1646 // Schedule asynchronous HTTP request.
1647 client_->asyncSendRequest(partner_config->getUrl(),
1648 partner_config->getTlsContext(),
1649 request, response,
1650 [this, partner_config, sync_complete_notified]
1651 (const boost::system::error_code& ec,
1652 const HttpResponsePtr& response,
1653 const std::string& error_str) {
1654
1655 // There are three possible groups of errors during the heartbeat.
1656 // One is the IO error causing issues in communication with the peer.
1657 // Another one is an HTTP parsing error. The last type of error is
1658 // when non-success error code is returned in the response carried
1659 // in the HTTP message or if the JSON response is otherwise broken.
1660
1661 bool heartbeat_success = true;
1662
1663 // Handle first two groups of errors.
1664 if (ec || !error_str.empty()) {
1665 LOG_WARN(ha_logger, HA_HEARTBEAT_COMMUNICATIONS_FAILED)
1666 .arg(partner_config->getLogLabel())
1667 .arg(ec ? ec.message() : error_str);
1668 heartbeat_success = false;
1669
1670 } else {
1671
1672 // Handle third group of errors.
1673 try {
1674 // Response must contain arguments and the arguments must
1675 // be a map.
1676 int rcode = 0;
1677 ConstElementPtr args = verifyAsyncResponse(response, rcode);
1678 if (!args || args->getType() != Element::map) {
1679 isc_throw(CtrlChannelError, "returned arguments in the response"
1680 " must be a map");
1681 }
1682 // Response must include partner's state.
1683 ConstElementPtr state = args->get("state");
1684 if (!state || state->getType() != Element::string) {
1685 isc_throw(CtrlChannelError, "server state not returned in response"
1686 " to a ha-heartbeat command or it is not a string");
1687 }
1688 // Remember the partner's state. This may throw if the returned
1689 // state is invalid.
1690 communication_state_->setPartnerState(state->stringValue());
1691
1692 ConstElementPtr date_time = args->get("date-time");
1693 if (!date_time || date_time->getType() != Element::string) {
1694 isc_throw(CtrlChannelError, "date-time not returned in response"
1695 " to a ha-heartbeat command or it is not a string");
1696 }
1697 // Note the time returned by the partner to calculate the clock skew.
1698 communication_state_->setPartnerTime(date_time->stringValue());
1699
1700 // Remember the scopes served by the partner.
1701 try {
1702 auto scopes = args->get("scopes");
1703 communication_state_->setPartnerScopes(scopes);
1704
1705 } catch (...) {
1706 // We don't want to fail if the scopes are missing because
1707 // this would be incompatible with old HA hook library
1708 // versions. We may make it mandatory one day, but during
1709 // upgrades of existing HA setup it would be a real issue
1710 // if we failed here.
1711 }
1712
1713 // unsent-update-count was not present in earlier HA versions.
1714 // Let's check if the partner has sent the parameter. We initialized
1715 // the counter to 0, and it remains 0 if the partner doesn't send it.
1716 // It effectively means that we don't track partner's unsent updates
1717 // as in the earlier HA versions.
1718 auto unsent_update_count = args->get("unsent-update-count");
1719 if (unsent_update_count) {
1720 if (unsent_update_count->getType() != Element::integer) {
1721 isc_throw(CtrlChannelError, "unsent-update-count returned in"
1722 " the ha-heartbeat response is not an integer");
1723 }
1724 communication_state_->setPartnerUnsentUpdateCount(static_cast<uint64_t>
1725 (unsent_update_count->intValue()));
1726 }
1727
1728 } catch (const std::exception& ex) {
1730 .arg(partner_config->getLogLabel())
1731 .arg(ex.what());
1732 heartbeat_success = false;
1733 }
1734 }
1735
1736 // If heartbeat was successful, let's mark the connection with the
1737 // peer as healthy.
1738 if (heartbeat_success) {
1739 communication_state_->poke();
1740
1741 } else {
1742 // We were unable to retrieve partner's state, so let's mark it
1743 // as unavailable.
1744 communication_state_->setPartnerState("unavailable");
1745 // Log if the communication is interrupted.
1746 if (communication_state_->isCommunicationInterrupted()) {
1748 .arg(partner_config->getName());
1749 }
1750 }
1751
1752 startHeartbeat();
1753 // Even though the partner notified us about the synchronization completion,
1754 // we still can't communicate with the partner. Let's continue serving
1755 // the clients until the link is fixed.
1756 if (sync_complete_notified && !heartbeat_success) {
1757 postNextEvent(HA_SYNCED_PARTNER_UNAVAILABLE_EVT);
1758 }
1759 // Whatever the result of the heartbeat was, the state machine needs
1760 // to react to this. Let's run the state machine until the state machine
1761 // finds that some new events are required, i.e. next heartbeat or
1762 // lease update. The runModel() may transition to another state, schedule
1763 // asynchronous tasks etc. Then it returns control to the DHCP server.
1764 runModel(HA_HEARTBEAT_COMPLETE_EVT);
1765 },
1767 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1768 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1769 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1770 );
1771}
1772
1773void
1774HAService::scheduleHeartbeat() {
1775 if (!communication_state_->isHeartbeatRunning()) {
1776 startHeartbeat();
1777 }
1778}
1779
1780void
1781HAService::startHeartbeat() {
1782 if (config_->getHeartbeatDelay() > 0) {
1783 communication_state_->startHeartbeat(config_->getHeartbeatDelay(),
1784 std::bind(&HAService::asyncSendHeartbeat,
1785 this));
1786 }
1787}
1788
1789void
1790HAService::asyncDisableDHCPService(HttpClient& http_client,
1791 const std::string& server_name,
1792 const unsigned int max_period,
1793 PostRequestCallback post_request_action) {
1794 HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1795
1796 // Create HTTP/1.1 request including our command.
1797 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1798 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1799 HostHttpHeader(remote_config->getUrl().getHostname()));
1800
1801 remote_config->addBasicAuthHttpHeader(request);
1802 request->setBodyAsJson(CommandCreator::createDHCPDisable(max_period,
1803 server_type_));
1804 request->finalize();
1805
1806 // Response object should also be created because the HTTP client needs
1807 // to know the type of the expected response.
1808 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1809
1810 // Schedule asynchronous HTTP request.
1811 http_client.asyncSendRequest(remote_config->getUrl(),
1812 remote_config->getTlsContext(),
1813 request, response,
1814 [this, remote_config, post_request_action]
1815 (const boost::system::error_code& ec,
1816 const HttpResponsePtr& response,
1817 const std::string& error_str) {
1818
1819 // There are three possible groups of errors during the heartbeat.
1820 // One is the IO error causing issues in communication with the peer.
1821 // Another one is an HTTP parsing error. The last type of error is
1822 // when non-success error code is returned in the response carried
1823 // in the HTTP message or if the JSON response is otherwise broken.
1824
1825 int rcode = 0;
1826 std::string error_message;
1827
1828 // Handle first two groups of errors.
1829 if (ec || !error_str.empty()) {
1830 error_message = (ec ? ec.message() : error_str);
1831 LOG_ERROR(ha_logger, HA_DHCP_DISABLE_COMMUNICATIONS_FAILED)
1832 .arg(remote_config->getLogLabel())
1833 .arg(error_message);
1834
1835 } else {
1836
1837 // Handle third group of errors.
1838 try {
1839 static_cast<void>(verifyAsyncResponse(response, rcode));
1840
1841 } catch (const std::exception& ex) {
1842 error_message = ex.what();
1844 .arg(remote_config->getLogLabel())
1845 .arg(error_message);
1846 }
1847 }
1848
1849 // If there was an error communicating with the partner, mark the
1850 // partner as unavailable.
1851 if (!error_message.empty()) {
1852 communication_state_->setPartnerState("unavailable");
1853 }
1854
1855 // Invoke post request action if it was specified.
1856 if (post_request_action) {
1857 post_request_action(error_message.empty(),
1858 error_message,
1859 rcode);
1860 }
1861 },
1863 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1864 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1865 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1866 );
1867}
1868
1869void
1870HAService::asyncEnableDHCPService(HttpClient& http_client,
1871 const std::string& server_name,
1872 PostRequestCallback post_request_action) {
1873 HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
1874
1875 // Create HTTP/1.1 request including our command.
1876 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
1877 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
1878 HostHttpHeader(remote_config->getUrl().getHostname()));
1879 remote_config->addBasicAuthHttpHeader(request);
1880 request->setBodyAsJson(CommandCreator::createDHCPEnable(server_type_));
1881 request->finalize();
1882
1883 // Response object should also be created because the HTTP client needs
1884 // to know the type of the expected response.
1885 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
1886
1887 // Schedule asynchronous HTTP request.
1888 http_client.asyncSendRequest(remote_config->getUrl(),
1889 remote_config->getTlsContext(),
1890 request, response,
1891 [this, remote_config, post_request_action]
1892 (const boost::system::error_code& ec,
1893 const HttpResponsePtr& response,
1894 const std::string& error_str) {
1895
1896 // There are three possible groups of errors during the heartbeat.
1897 // One is the IO error causing issues in communication with the peer.
1898 // Another one is an HTTP parsing error. The last type of error is
1899 // when non-success error code is returned in the response carried
1900 // in the HTTP message or if the JSON response is otherwise broken.
1901
1902 int rcode = 0;
1903 std::string error_message;
1904
1905 // Handle first two groups of errors.
1906 if (ec || !error_str.empty()) {
1907 error_message = (ec ? ec.message() : error_str);
1908 LOG_ERROR(ha_logger, HA_DHCP_ENABLE_COMMUNICATIONS_FAILED)
1909 .arg(remote_config->getLogLabel())
1910 .arg(error_message);
1911
1912 } else {
1913
1914 // Handle third group of errors.
1915 try {
1916 static_cast<void>(verifyAsyncResponse(response, rcode));
1917
1918 } catch (const std::exception& ex) {
1919 error_message = ex.what();
1921 .arg(remote_config->getLogLabel())
1922 .arg(error_message);
1923 }
1924 }
1925
1926 // If there was an error communicating with the partner, mark the
1927 // partner as unavailable.
1928 if (!error_message.empty()) {
1929 communication_state_->setPartnerState("unavailable");
1930 }
1931
1932 // Invoke post request action if it was specified.
1933 if (post_request_action) {
1934 post_request_action(error_message.empty(),
1935 error_message,
1936 rcode);
1937 }
1938 },
1940 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
1941 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
1942 std::bind(&HAService::clientCloseHandler, this, ph::_1)
1943 );
1944}
1945
1946void
1947HAService::localDisableDHCPService() {
1948 network_state_->disableService(NetworkState::Origin::HA_COMMAND);
1949}
1950
1951void
1952HAService::localEnableDHCPService() {
1953 network_state_->enableService(NetworkState::Origin::HA_COMMAND);
1954}
1955
1956void
1957HAService::asyncSyncLeases() {
1958 PostSyncCallback null_action;
1959
1960 // Timeout is configured in milliseconds. Need to convert to seconds.
1961 unsigned int dhcp_disable_timeout =
1962 static_cast<unsigned int>(config_->getSyncTimeout() / 1000);
1963 if (dhcp_disable_timeout == 0) {
1964 // Ensure that we always use at least 1 second timeout.
1965 dhcp_disable_timeout = 1;
1966 }
1967
1968 asyncSyncLeases(*client_, config_->getFailoverPeerConfig()->getName(),
1969 dhcp_disable_timeout, LeasePtr(), null_action);
1970}
1971
1972void
1973HAService::asyncSyncLeases(http::HttpClient& http_client,
1974 const std::string& server_name,
1975 const unsigned int max_period,
1976 const dhcp::LeasePtr& last_lease,
1977 PostSyncCallback post_sync_action,
1978 const bool dhcp_disabled) {
1979 // Synchronization starts with a command to disable DHCP service of the
1980 // peer from which we're fetching leases. We don't want the other server
1981 // to allocate new leases while we fetch from it. The DHCP service will
1982 // be disabled for a certain amount of time and will be automatically
1983 // re-enabled if we die during the synchronization.
1984 asyncDisableDHCPService(http_client, server_name, max_period,
1985 [this, &http_client, server_name, max_period, last_lease,
1986 post_sync_action, dhcp_disabled]
1987 (const bool success, const std::string& error_message, const int) {
1988
1989 // If we have successfully disabled the DHCP service on the peer,
1990 // we can start fetching the leases.
1991 if (success) {
1992 // The last argument indicates that disabling the DHCP
1993 // service on the partner server was successful.
1994 asyncSyncLeasesInternal(http_client, server_name, max_period,
1995 last_lease, post_sync_action, true);
1996
1997 } else {
1998 post_sync_action(success, error_message, dhcp_disabled);
1999 }
2000 });
2001}
2002
2003void
2004HAService::asyncSyncLeasesInternal(http::HttpClient& http_client,
2005 const std::string& server_name,
2006 const unsigned int max_period,
2007 const dhcp::LeasePtr& last_lease,
2008 PostSyncCallback post_sync_action,
2009 const bool dhcp_disabled) {
2010
2011 HAConfig::PeerConfigPtr partner_config = config_->getFailoverPeerConfig();
2012
2013 // Create HTTP/1.1 request including our command.
2014 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2015 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2016 HostHttpHeader(partner_config->getUrl().getHostname()));
2017 partner_config->addBasicAuthHttpHeader(request);
2018 if (server_type_ == HAServerType::DHCPv4) {
2019 request->setBodyAsJson(CommandCreator::createLease4GetPage(
2020 boost::dynamic_pointer_cast<Lease4>(last_lease), config_->getSyncPageLimit()));
2021
2022 } else {
2023 request->setBodyAsJson(CommandCreator::createLease6GetPage(
2024 boost::dynamic_pointer_cast<Lease6>(last_lease), config_->getSyncPageLimit()));
2025 }
2026 request->finalize();
2027
2028 // Response object should also be created because the HTTP client needs
2029 // to know the type of the expected response.
2030 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2031
2032 // Schedule asynchronous HTTP request.
2033 http_client.asyncSendRequest(partner_config->getUrl(),
2034 partner_config->getTlsContext(),
2035 request, response,
2036 [this, partner_config, post_sync_action, &http_client, server_name,
2037 max_period, dhcp_disabled]
2038 (const boost::system::error_code& ec,
2039 const HttpResponsePtr& response,
2040 const std::string& error_str) {
2041
2042 // Holds last lease received on the page of leases. If the last
2043 // page was hit, this value remains null.
2044 LeasePtr last_lease;
2045
2046 // There are three possible groups of errors during the heartbeat.
2047 // One is the IO error causing issues in communication with the peer.
2048 // Another one is an HTTP parsing error. The last type of error is
2049 // when non-success error code is returned in the response carried
2050 // in the HTTP message or if the JSON response is otherwise broken.
2051
2052 std::string error_message;
2053
2054 // Handle first two groups of errors.
2055 if (ec || !error_str.empty()) {
2056 error_message = (ec ? ec.message() : error_str);
2057 LOG_ERROR(ha_logger, HA_LEASES_SYNC_COMMUNICATIONS_FAILED)
2058 .arg(partner_config->getLogLabel())
2059 .arg(error_message);
2060
2061 } else {
2062 // Handle third group of errors.
2063 try {
2064 int rcode = 0;
2065 ConstElementPtr args = verifyAsyncResponse(response, rcode);
2066
2067 // Arguments must be a map.
2068 if (args && (args->getType() != Element::map)) {
2069 isc_throw(CtrlChannelError,
2070 "arguments in the received response must be a map");
2071 }
2072
2073 ConstElementPtr leases = args->get("leases");
2074 if (!leases || (leases->getType() != Element::list)) {
2075 isc_throw(CtrlChannelError,
2076 "server response does not contain leases argument or this"
2077 " argument is not a list");
2078 }
2079
2080 // Iterate over the leases and update the database as appropriate.
2081 const auto& leases_element = leases->listValue();
2082
2083 LOG_INFO(ha_logger, HA_LEASES_SYNC_LEASE_PAGE_RECEIVED)
2084 .arg(leases_element.size())
2085 .arg(server_name);
2086
2087 for (auto l = leases_element.begin(); l != leases_element.end(); ++l) {
2088 try {
2089
2090 if (server_type_ == HAServerType::DHCPv4) {
2091 Lease4Ptr lease = Lease4::fromElement(*l);
2092
2093 // Check if there is such lease in the database already.
2094 Lease4Ptr existing_lease = LeaseMgrFactory::instance().getLease4(lease->addr_);
2095 if (!existing_lease) {
2096 // There is no such lease, so let's add it.
2097 LeaseMgrFactory::instance().addLease(lease);
2098
2099 } else if (existing_lease->cltt_ < lease->cltt_) {
2100 // If the existing lease is older than the fetched lease, update
2101 // the lease in our local database.
2102 // Update lease current expiration time with value received from the
2103 // database. Some database backends reject operations on the lease if
2104 // the current expiration time value does not match what is stored.
2105 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2106 LeaseMgrFactory::instance().updateLease4(lease);
2107
2108 } else {
2109 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE4_SKIP)
2110 .arg(lease->addr_.toText())
2111 .arg(lease->subnet_id_);
2112 }
2113
2114 // If we're not on the last page and we're processing final lease on
2115 // this page, let's record the lease as input to the next
2116 // lease4-get-page command.
2117 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2118 (l + 1 == leases_element.end())) {
2119 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2120 }
2121
2122 } else {
2123 Lease6Ptr lease = Lease6::fromElement(*l);
2124
2125 // Check if there is such lease in the database already.
2126 Lease6Ptr existing_lease = LeaseMgrFactory::instance().getLease6(lease->type_,
2127 lease->addr_);
2128 if (!existing_lease) {
2129 // There is no such lease, so let's add it.
2130 LeaseMgrFactory::instance().addLease(lease);
2131
2132 } else if (existing_lease->cltt_ < lease->cltt_) {
2133 // If the existing lease is older than the fetched lease, update
2134 // the lease in our local database.
2135 // Update lease current expiration time with value received from the
2136 // database. Some database backends reject operations on the lease if
2137 // the current expiration time value does not match what is stored.
2138 Lease::syncCurrentExpirationTime(*existing_lease, *lease);
2139 LeaseMgrFactory::instance().updateLease6(lease);
2140
2141 } else {
2142 LOG_DEBUG(ha_logger, DBGLVL_TRACE_BASIC, HA_LEASE_SYNC_STALE_LEASE6_SKIP)
2143 .arg(lease->addr_.toText())
2144 .arg(lease->subnet_id_);
2145 }
2146
2147 // If we're not on the last page and we're processing final lease on
2148 // this page, let's record the lease as input to the next
2149 // lease6-get-page command.
2150 if ((leases_element.size() >= config_->getSyncPageLimit()) &&
2151 (l + 1 == leases_element.end())) {
2152 last_lease = boost::dynamic_pointer_cast<Lease>(lease);
2153 }
2154 }
2155
2156 } catch (const std::exception& ex) {
2157 LOG_WARN(ha_logger, HA_LEASE_SYNC_FAILED)
2158 .arg((*l)->str())
2159 .arg(ex.what());
2160 }
2161 }
2162
2163 } catch (const std::exception& ex) {
2164 error_message = ex.what();
2166 .arg(partner_config->getLogLabel())
2167 .arg(error_message);
2168 }
2169 }
2170
2171 // If there was an error communicating with the partner, mark the
2172 // partner as unavailable.
2173 if (!error_message.empty()) {
2174 communication_state_->setPartnerState("unavailable");
2175
2176 } else if (last_lease) {
2177 // This indicates that there are more leases to be fetched.
2178 // Therefore, we have to send another leaseX-get-page command.
2179 asyncSyncLeases(http_client, server_name, max_period, last_lease,
2180 post_sync_action, dhcp_disabled);
2181 return;
2182 }
2183
2184 // Invoke post synchronization action if it was specified.
2185 if (post_sync_action) {
2186 post_sync_action(error_message.empty(),
2187 error_message,
2188 dhcp_disabled);
2189 }
2190 },
2191 HttpClient::RequestTimeout(config_->getSyncTimeout()),
2192 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2193 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2194 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2195 );
2196
2197}
2198
2200HAService::processSynchronize(const std::string& server_name,
2201 const unsigned int max_period) {
2202 std::string answer_message;
2203 int sync_status = synchronize(answer_message, server_name, max_period);
2204 return (createAnswer(sync_status, answer_message));
2205}
2206
2207int
2208HAService::synchronize(std::string& status_message, const std::string& server_name,
2209 const unsigned int max_period) {
2210 IOService io_service;
2211 HttpClient client(io_service);
2212
2213 asyncSyncLeases(client, server_name, max_period, Lease4Ptr(),
2214 [&](const bool success, const std::string& error_message,
2215 const bool dhcp_disabled) {
2216 // If there was a fatal error while fetching the leases, let's
2217 // log an error message so as it can be included in the response
2218 // to the controlling client.
2219 if (!success) {
2220 status_message = error_message;
2221 }
2222
2223 // Whether or not there was an error while fetching the leases,
2224 // we need to re-enable the DHCP service on the peer if the
2225 // DHCP service was disabled in the course of synchronization.
2226 if (dhcp_disabled) {
2227 // If the synchronization was completed successfully let's
2228 // try to send the ha-sync-complete-notify command to the
2229 // partner.
2230 if (success) {
2231 asyncSyncCompleteNotify(client, server_name,
2232 [&](const bool success,
2233 const std::string& error_message,
2234 const int rcode) {
2235 // This command may not be supported by the partner when it
2236 // runs an older Kea version. In that case, send the dhcp-enable
2237 // command as in previous Kea version.
2239 asyncEnableDHCPService(client, server_name,
2240 [&](const bool success,
2241 const std::string& error_message,
2242 const int) {
2243 // It is possible that we have already recorded an error
2244 // message while synchronizing the lease database. Don't
2245 // override the existing error message.
2246 if (!success && status_message.empty()) {
2247 status_message = error_message;
2248 }
2249
2250 // The synchronization process is completed, so let's break
2251 // the IO service so as we can return the response to the
2252 // controlling client.
2253 io_service.stop();
2254 });
2255
2256 } else {
2257 // ha-sync-complete-notify command was delivered to the partner.
2258 // The synchronization process ends here.
2259 if (!success && status_message.empty()) {
2260 status_message = error_message;
2261 }
2262
2263 io_service.stop();
2264 }
2265 });
2266
2267 } else {
2268 // Synchronization was unsuccessul. Send the dhcp-enable command to
2269 // re-enable the DHCP service. Note, that we don't send the
2270 // ha-sync-complete-notify command in this case. It is only sent in
2271 // the case when synchronization ends successfully.
2272 asyncEnableDHCPService(client, server_name,
2273 [&](const bool success,
2274 const std::string& error_message,
2275 const int) {
2276 if (!success && status_message.empty()) {
2277 status_message = error_message;
2278 }
2279
2280 // The synchronization process is completed, so let's break
2281 // the IO service so as we can return the response to the
2282 // controlling client.
2283 io_service.stop();
2284
2285 });
2286 }
2287
2288 } else {
2289 // Also stop IO service if there is no need to enable DHCP
2290 // service.
2291 io_service.stop();
2292 }
2293 });
2294
2295 LOG_INFO(ha_logger, HA_SYNC_START).arg(server_name);
2296
2297 // Measure duration of the synchronization.
2298 Stopwatch stopwatch;
2299
2300 // Run the IO service until it is stopped by any of the callbacks. This
2301 // makes it synchronous.
2302 io_service.run();
2303
2304 // End measuring duration.
2305 stopwatch.stop();
2306
2307 // If an error message has been recorded, return an error to the controlling
2308 // client.
2309 if (!status_message.empty()) {
2310 postNextEvent(HA_SYNCING_FAILED_EVT);
2311
2313 .arg(server_name)
2314 .arg(status_message);
2315
2316 return (CONTROL_RESULT_ERROR);
2317
2318 }
2319
2320 // Everything was fine, so let's return a success.
2321 status_message = "Lease database synchronization complete.";
2322 postNextEvent(HA_SYNCING_SUCCEEDED_EVT);
2323
2325 .arg(server_name)
2326 .arg(stopwatch.logFormatLastDuration());
2327
2328 return (CONTROL_RESULT_SUCCESS);
2329}
2330
2331void
2332HAService::asyncSendLeaseUpdatesFromBacklog(HttpClient& http_client,
2333 const HAConfig::PeerConfigPtr& config,
2334 PostRequestCallback post_request_action) {
2335 if (lease_update_backlog_.size() == 0) {
2336 post_request_action(true, "", CONTROL_RESULT_SUCCESS);
2337 return;
2338 }
2339
2340 ConstElementPtr command;
2341 if (server_type_ == HAServerType::DHCPv4) {
2343 Lease4Ptr lease = boost::dynamic_pointer_cast<Lease4>(lease_update_backlog_.pop(op_type));
2344 if (op_type == LeaseUpdateBacklog::ADD) {
2345 command = CommandCreator::createLease4Update(*lease);
2346 } else {
2347 command = CommandCreator::createLease4Delete(*lease);
2348 }
2349
2350 } else {
2351 command = CommandCreator::createLease6BulkApply(lease_update_backlog_);
2352 }
2353
2354 // Create HTTP/1.1 request including our command.
2355 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2356 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2357 HostHttpHeader(config->getUrl().getHostname()));
2358 config->addBasicAuthHttpHeader(request);
2359 request->setBodyAsJson(command);
2360 request->finalize();
2361
2362 // Response object should also be created because the HTTP client needs
2363 // to know the type of the expected response.
2364 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2365
2366 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2367 request, response,
2368 [this, &http_client, config, post_request_action]
2369 (const boost::system::error_code& ec,
2370 const HttpResponsePtr& response,
2371 const std::string& error_str) {
2372
2373 int rcode = 0;
2374 std::string error_message;
2375
2376 if (ec || !error_str.empty()) {
2377 error_message = (ec ? ec.message() : error_str);
2378 LOG_WARN(ha_logger, HA_LEASES_BACKLOG_COMMUNICATIONS_FAILED)
2379 .arg(config->getLogLabel())
2380 .arg(ec ? ec.message() : error_str);
2381
2382 } else {
2383 // Handle third group of errors.
2384 try {
2385 auto args = verifyAsyncResponse(response, rcode);
2386 } catch (const std::exception& ex) {
2387 error_message = ex.what();
2389 .arg(config->getLogLabel())
2390 .arg(ex.what());
2391 }
2392 }
2393
2394 // Recursively send all outstanding lease updates or break when an
2395 // error occurs. In DHCPv6, this is a single iteration because we use
2396 // lease6-bulk-apply, which combines many lease updates in a single
2397 // transaction. In the case of DHCPv4, each update is sent in its own
2398 // transaction.
2399 if (error_message.empty()) {
2400 asyncSendLeaseUpdatesFromBacklog(http_client, config, post_request_action);
2401 } else {
2402 post_request_action(error_message.empty(), error_message, rcode);
2403 }
2404 });
2405}
2406
2407bool
2408HAService::sendLeaseUpdatesFromBacklog() {
2409 auto num_updates = lease_update_backlog_.size();
2410 if (num_updates == 0) {
2412 return (true);
2413 }
2414
2415 IOService io_service;
2416 HttpClient client(io_service);
2417 auto remote_config = config_->getFailoverPeerConfig();
2418 bool updates_successful = true;
2419
2421 .arg(num_updates)
2422 .arg(remote_config->getName());
2423
2424 asyncSendLeaseUpdatesFromBacklog(client, remote_config,
2425 [&](const bool success, const std::string&, const int) {
2426 io_service.stop();
2427 updates_successful = success;
2428 });
2429
2430 // Measure duration of the updates.
2431 Stopwatch stopwatch;
2432
2433 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2434 io_service.run();
2435
2436 // End measuring duration.
2437 stopwatch.stop();
2438
2439 if (updates_successful) {
2441 .arg(remote_config->getName())
2442 .arg(stopwatch.logFormatLastDuration());
2443 }
2444
2445 return (updates_successful);
2446}
2447
2448void
2449HAService::asyncSendHAReset(HttpClient& http_client,
2450 const HAConfig::PeerConfigPtr& config,
2451 PostRequestCallback post_request_action) {
2452 ConstElementPtr command = CommandCreator::createHAReset(server_type_);
2453
2454 // Create HTTP/1.1 request including our command.
2455 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2456 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2457 HostHttpHeader(config->getUrl().getHostname()));
2458 config->addBasicAuthHttpHeader(request);
2459 request->setBodyAsJson(command);
2460 request->finalize();
2461
2462 // Response object should also be created because the HTTP client needs
2463 // to know the type of the expected response.
2464 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2465
2466 http_client.asyncSendRequest(config->getUrl(), config->getTlsContext(),
2467 request, response,
2468 [this, config, post_request_action]
2469 (const boost::system::error_code& ec,
2470 const HttpResponsePtr& response,
2471 const std::string& error_str) {
2472
2473 int rcode = 0;
2474 std::string error_message;
2475
2476 if (ec || !error_str.empty()) {
2477 error_message = (ec ? ec.message() : error_str);
2478 LOG_WARN(ha_logger, HA_RESET_COMMUNICATIONS_FAILED)
2479 .arg(config->getLogLabel())
2480 .arg(ec ? ec.message() : error_str);
2481
2482 } else {
2483 // Handle third group of errors.
2484 try {
2485 auto args = verifyAsyncResponse(response, rcode);
2486 } catch (const std::exception& ex) {
2487 error_message = ex.what();
2489 .arg(config->getLogLabel())
2490 .arg(ex.what());
2491 }
2492 }
2493
2494 post_request_action(error_message.empty(), error_message, rcode);
2495 });
2496}
2497
2498bool
2499HAService::sendHAReset() {
2500 IOService io_service;
2501 HttpClient client(io_service);
2502 auto remote_config = config_->getFailoverPeerConfig();
2503 bool reset_successful = true;
2504
2505 asyncSendHAReset(client, remote_config,
2506 [&](const bool success, const std::string&, const int) {
2507 io_service.stop();
2508 reset_successful = success;
2509 });
2510
2511 // Run the IO service until it is stopped by the callback. This makes it synchronous.
2512 io_service.run();
2513
2514 return (reset_successful);
2515}
2516
2518HAService::processScopes(const std::vector<std::string>& scopes) {
2519 try {
2520 query_filter_.serveScopes(scopes);
2521 adjustNetworkState();
2522
2523 } catch (const std::exception& ex) {
2524 return (createAnswer(CONTROL_RESULT_ERROR, ex.what()));
2525 }
2526
2527 return (createAnswer(CONTROL_RESULT_SUCCESS, "New HA scopes configured."));
2528}
2529
2531HAService::processContinue() {
2532 if (unpause()) {
2533 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine continues."));
2534 }
2535 return (createAnswer(CONTROL_RESULT_SUCCESS, "HA state machine is not paused."));
2536}
2537
2539HAService::processMaintenanceNotify(const bool cancel) {
2540 if (cancel) {
2541 if (getCurrState() != HA_IN_MAINTENANCE_ST) {
2542 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel the"
2543 " maintenance for the server not in the"
2544 " in-maintenance state."));
2545 }
2546
2547 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
2548 verboseTransition(getPrevState());
2549 runModel(NOP_EVT);
2550 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server maintenance canceled."));
2551 }
2552
2553 switch (getCurrState()) {
2554 case HA_BACKUP_ST:
2556 case HA_TERMINATED_ST:
2557 // The reason why we don't return an error result here is that we have to
2558 // have a way to distinguish between the errors caused by the communication
2559 // issues and the cases when there is no communication error but the server
2560 // is not allowed to enter the in-maintenance state. In the former case, the
2561 // partner would go to partner-down. In the case signaled by the special
2562 // result code entering the maintenance state is not allowed.
2563 return (createAnswer(HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED,
2564 "Unable to transition the server from the "
2565 + stateToString(getCurrState()) + " to"
2566 " in-maintenance state."));
2567 default:
2568 verboseTransition(HA_IN_MAINTENANCE_ST);
2569 runModel(HA_MAINTENANCE_NOTIFY_EVT);
2570 }
2571 return (createAnswer(CONTROL_RESULT_SUCCESS, "Server is in-maintenance state."));
2572}
2573
2575HAService::processMaintenanceStart() {
2576 switch (getCurrState()) {
2577 case HA_BACKUP_ST:
2580 case HA_TERMINATED_ST:
2581 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition the server from"
2582 " the " + stateToString(getCurrState()) + " to"
2583 " partner-in-maintenance state."));
2584 default:
2585 ;
2586 }
2587
2588 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2589
2590 // Create HTTP/1.1 request including ha-maintenance-notify command
2591 // with the cancel flag set to false.
2592 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2593 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2594 HostHttpHeader(remote_config->getUrl().getHostname()));
2595 remote_config->addBasicAuthHttpHeader(request);
2596 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(false, server_type_));
2597 request->finalize();
2598
2599 // Response object should also be created because the HTTP client needs
2600 // to know the type of the expected response.
2601 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2602
2603 IOService io_service;
2604 HttpClient client(io_service);
2605
2606 boost::system::error_code captured_ec;
2607 std::string captured_error_message;
2608 int captured_rcode = 0;
2609
2610 // Schedule asynchronous HTTP request.
2611 client.asyncSendRequest(remote_config->getUrl(),
2612 remote_config->getTlsContext(),
2613 request, response,
2614 [this, remote_config, &io_service, &captured_ec, &captured_error_message,
2615 &captured_rcode]
2616 (const boost::system::error_code& ec,
2617 const HttpResponsePtr& response,
2618 const std::string& error_str) {
2619
2620 io_service.stop();
2621
2622 // There are three possible groups of errors. One is the IO error
2623 // causing issues in communication with the peer. Another one is
2624 // an HTTP parsing error. The last type of error is when non-success
2625 // error code is returned in the response carried in the HTTP message
2626 // or if the JSON response is otherwise broken.
2627
2628 std::string error_message;
2629
2630 // Handle first two groups of errors.
2631 if (ec || !error_str.empty()) {
2632 error_message = (ec ? ec.message() : error_str);
2633 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_COMMUNICATIONS_FAILED)
2634 .arg(remote_config->getLogLabel())
2635 .arg(error_message);
2636
2637 } else {
2638
2639 // Handle third group of errors.
2640 try {
2641 static_cast<void>(verifyAsyncResponse(response, captured_rcode));
2642
2643 } catch (const std::exception& ex) {
2644 error_message = ex.what();
2646 .arg(remote_config->getLogLabel())
2647 .arg(error_message);
2648 }
2649 }
2650
2651 // If there was an error communicating with the partner, mark the
2652 // partner as unavailable.
2653 if (!error_message.empty()) {
2654 communication_state_->setPartnerState("unavailable");
2655 }
2656
2657 captured_ec = ec;
2658 captured_error_message = error_message;
2659 },
2661 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2662 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2663 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2664 );
2665
2666 // Run the IO service until it is stopped by any of the callbacks. This
2667 // makes it synchronous.
2668 io_service.run();
2669
2670 // If there was a communication problem with the partner we assume that
2671 // the partner is already down while we receive this command.
2672 if (captured_ec || (captured_rcode == CONTROL_RESULT_ERROR)) {
2673 postNextEvent(HA_MAINTENANCE_START_EVT);
2674 verboseTransition(HA_PARTNER_DOWN_ST);
2675 runModel(NOP_EVT);
2677 "Server is now in the partner-down state as its"
2678 " partner appears to be offline for maintenance."));
2679
2680 } else if (captured_rcode == CONTROL_RESULT_SUCCESS) {
2681 // If the partner responded indicating no error it means that the
2682 // partner has been transitioned to the in-maintenance state. In that
2683 // case we transition to the partner-in-maintenance state.
2684 postNextEvent(HA_MAINTENANCE_START_EVT);
2685 verboseTransition(HA_PARTNER_IN_MAINTENANCE_ST);
2686 runModel(NOP_EVT);
2687
2688 } else {
2689 // Partner server returned a special status code which means that it can't
2690 // transition to the partner-in-maintenance state.
2691 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to transition to the"
2692 " partner-in-maintenance state. The partner server responded"
2693 " with the following message to the ha-maintenance-notify"
2694 " command: " + captured_error_message + "."));
2695
2696 }
2697
2699 "Server is now in the partner-in-maintenance state"
2700 " and its partner is in-maintenance state. The partner"
2701 " can be now safely shut down."));
2702}
2703
2705HAService::processMaintenanceCancel() {
2706 if (getCurrState() != HA_PARTNER_IN_MAINTENANCE_ST) {
2707 return (createAnswer(CONTROL_RESULT_ERROR, "Unable to cancel maintenance"
2708 " request because the server is not in the"
2709 " partner-in-maintenance state."));
2710 }
2711
2712 HAConfig::PeerConfigPtr remote_config = config_->getFailoverPeerConfig();
2713
2714 // Create HTTP/1.1 request including ha-maintenance-notify command
2715 // with the cancel flag set to true.
2716 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2717 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2718 HostHttpHeader(remote_config->getUrl().getHostname()));
2719 remote_config->addBasicAuthHttpHeader(request);
2720 request->setBodyAsJson(CommandCreator::createMaintenanceNotify(true, server_type_));
2721 request->finalize();
2722
2723 // Response object should also be created because the HTTP client needs
2724 // to know the type of the expected response.
2725 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2726
2727 IOService io_service;
2728 HttpClient client(io_service);
2729
2730 std::string error_message;
2731
2732 // Schedule asynchronous HTTP request.
2733 client.asyncSendRequest(remote_config->getUrl(),
2734 remote_config->getTlsContext(),
2735 request, response,
2736 [this, remote_config, &io_service, &error_message]
2737 (const boost::system::error_code& ec,
2738 const HttpResponsePtr& response,
2739 const std::string& error_str) {
2740
2741 io_service.stop();
2742
2743 // Handle first two groups of errors.
2744 if (ec || !error_str.empty()) {
2745 error_message = (ec ? ec.message() : error_str);
2746 LOG_ERROR(ha_logger, HA_MAINTENANCE_NOTIFY_CANCEL_COMMUNICATIONS_FAILED)
2747 .arg(remote_config->getLogLabel())
2748 .arg(error_message);
2749
2750 } else {
2751
2752 // Handle third group of errors.
2753 try {
2754 int rcode = 0;
2755 static_cast<void>(verifyAsyncResponse(response, rcode));
2756
2757 } catch (const std::exception& ex) {
2758 error_message = ex.what();
2760 .arg(remote_config->getLogLabel())
2761 .arg(error_message);
2762 }
2763 }
2764
2765 // If there was an error communicating with the partner, mark the
2766 // partner as unavailable.
2767 if (!error_message.empty()) {
2768 communication_state_->setPartnerState("unavailable");
2769 }
2770 },
2772 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2773 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2774 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2775 );
2776
2777 // Run the IO service until it is stopped by any of the callbacks. This
2778 // makes it synchronous.
2779 io_service.run();
2780
2781 // There was an error in communication with the partner or the
2782 // partner was unable to revert its state.
2783 if (!error_message.empty()) {
2785 "Unable to cancel maintenance. The partner server responded"
2786 " with the following message to the ha-maintenance-notify"
2787 " command: " + error_message + "."));
2788 }
2789
2790 // Successfully reverted partner's state. Let's also revert our state to the
2791 // previous one.
2792 postNextEvent(HA_MAINTENANCE_CANCEL_EVT);
2793 verboseTransition(getPrevState());
2794 runModel(NOP_EVT);
2795
2797 "Server maintenance successfully canceled."));
2798}
2799
2800void
2801HAService::asyncSyncCompleteNotify(HttpClient& http_client,
2802 const std::string& server_name,
2803 PostRequestCallback post_request_action) {
2804 HAConfig::PeerConfigPtr remote_config = config_->getPeerConfig(server_name);
2805
2806 // Create HTTP/1.1 request including our command.
2807 PostHttpRequestJsonPtr request = boost::make_shared<PostHttpRequestJson>
2808 (HttpRequest::Method::HTTP_POST, "/", HttpVersion::HTTP_11(),
2809 HostHttpHeader(remote_config->getUrl().getHostname()));
2810
2811 remote_config->addBasicAuthHttpHeader(request);
2812 request->setBodyAsJson(CommandCreator::createSyncCompleteNotify(server_type_));
2813 request->finalize();
2814
2815 // Response object should also be created because the HTTP client needs
2816 // to know the type of the expected response.
2817 HttpResponseJsonPtr response = boost::make_shared<HttpResponseJson>();
2818
2819 // Schedule asynchronous HTTP request.
2820 http_client.asyncSendRequest(remote_config->getUrl(),
2821 remote_config->getTlsContext(),
2822 request, response,
2823 [this, remote_config, post_request_action]
2824 (const boost::system::error_code& ec,
2825 const HttpResponsePtr& response,
2826 const std::string& error_str) {
2827
2828 // There are three possible groups of errors. One is the IO error
2829 // causing issues in communication with the peer. Another one is an
2830 // HTTP parsing error. The last type of error is when non-success
2831 // error code is returned in the response carried in the HTTP message
2832 // or if the JSON response is otherwise broken.
2833
2834 int rcode = 0;
2835 std::string error_message;
2836
2837 // Handle first two groups of errors.
2838 if (ec || !error_str.empty()) {
2839 error_message = (ec ? ec.message() : error_str);
2840 LOG_ERROR(ha_logger, HA_SYNC_COMPLETE_NOTIFY_COMMUNICATIONS_FAILED)
2841 .arg(remote_config->getLogLabel())
2842 .arg(error_message);
2843
2844 } else {
2845
2846 // Handle third group of errors.
2847 try {
2848 static_cast<void>(verifyAsyncResponse(response, rcode));
2849
2850 } catch (const CommandUnsupportedError& ex) {
2852
2853 } catch (const std::exception& ex) {
2854 error_message = ex.what();
2856 .arg(remote_config->getLogLabel())
2857 .arg(error_message);
2858 }
2859 }
2860
2861 // If there was an error communicating with the partner, mark the
2862 // partner as unavailable.
2863 if (!error_message.empty()) {
2864 communication_state_->setPartnerState("unavailable");
2865 }
2866
2867 // Invoke post request action if it was specified.
2868 if (post_request_action) {
2869 post_request_action(error_message.empty(),
2870 error_message,
2871 rcode);
2872 }
2873 },
2875 std::bind(&HAService::clientConnectHandler, this, ph::_1, ph::_2),
2876 std::bind(&HAService::clientHandshakeHandler, this, ph::_1),
2877 std::bind(&HAService::clientCloseHandler, this, ph::_1)
2878 );
2879}
2880
2882HAService::processSyncCompleteNotify() {
2883 if (getCurrState() == HA_PARTNER_DOWN_ST) {
2884 sync_complete_notified_ = true;
2885 } else {
2886 localEnableDHCPService();
2887 }
2889 "Server successfully notified about the synchronization completion."));
2890}
2891
2893HAService::verifyAsyncResponse(const HttpResponsePtr& response, int& rcode) {
2894 // Set the return code to error in case of early throw.
2895 rcode = CONTROL_RESULT_ERROR;
2896 // The response must cast to JSON type.
2897 HttpResponseJsonPtr json_response =
2898 boost::dynamic_pointer_cast<HttpResponseJson>(response);
2899 if (!json_response) {
2900 isc_throw(CtrlChannelError, "no valid HTTP response found");
2901 }
2902
2903 // Body holds the response to our command.
2904 ConstElementPtr body = json_response->getBodyAsJson();
2905 if (!body) {
2906 isc_throw(CtrlChannelError, "no body found in the response");
2907 }
2908
2909 // Body should contain a list of responses from multiple servers.
2910 if (body->getType() != Element::list) {
2911 // Some control agent errors are returned as a map.
2912 if (body->getType() == Element::map) {
2913 ElementPtr list = Element::createList();
2914 ElementPtr answer = Element::createMap();
2915 answer->set(CONTROL_RESULT, Element::create(rcode));
2916 ConstElementPtr text = body->get(CONTROL_TEXT);
2917 if (text) {
2918 answer->set(CONTROL_TEXT, text);
2919 }
2920 list->add(answer);
2921 body = list;
2922 } else {
2923 isc_throw(CtrlChannelError, "body of the response must be a list");
2924 }
2925 }
2926
2927 // There must be at least one response.
2928 if (body->empty()) {
2929 isc_throw(CtrlChannelError, "list of responses must not be empty");
2930 }
2931
2932 // Check if the status code of the first response. We don't support multiple
2933 // at this time, because we always send a request to a single location.
2934 ConstElementPtr args = parseAnswer(rcode, body->get(0));
2935 if ((rcode != CONTROL_RESULT_SUCCESS) &&
2936 (rcode != CONTROL_RESULT_EMPTY)) {
2937 std::ostringstream s;
2938 // Include an error text if available.
2939 if (args && args->getType() == Element::string) {
2940 s << args->stringValue() << ", ";
2941 }
2942 // Include an error code.
2943 s << "error code " << rcode;
2944
2946 isc_throw(CommandUnsupportedError, s.str());
2947 } else {
2948 isc_throw(CtrlChannelError, s.str());
2949 }
2950 }
2951
2952 return (args);
2953}
2954
2955bool
2956HAService::clientConnectHandler(const boost::system::error_code& ec, int tcp_native_fd) {
2957
2958 // If client is running it's own IOService we do NOT want to
2959 // register the socket with IfaceMgr.
2960 if (client_->getThreadIOService()) {
2961 return (true);
2962 }
2963
2964 // If things look OK register the socket with Interface Manager. Note
2965 // we don't register if the FD is < 0 to avoid an exception throw.
2966 // It is unlikely that this will occur but we want to be liberal
2967 // and avoid issues.
2968 if ((!ec || (ec.value() == boost::asio::error::in_progress))
2969 && (tcp_native_fd >= 0)) {
2970 // External socket callback is a NOP. Ready events handlers are
2971 // run by an explicit call IOService ready in kea-dhcp<n> code.
2972 // We are registering the socket only to interrupt main-thread
2973 // select().
2974 IfaceMgr::instance().addExternalSocket(tcp_native_fd,
2975 std::bind(&HAService::socketReadyHandler, this, ph::_1)
2976 );
2977 }
2978
2979 // If ec.value() == boost::asio::error::already_connected, we should already
2980 // be registered, so nothing to do. If it is any other value, then connect
2981 // failed and Connection logic should handle that, not us, so no matter
2982 // what happens we're returning true.
2983 return (true);
2984}
2985
2986void
2987HAService::socketReadyHandler(int tcp_native_fd) {
2988 // If the socket is ready but does not belong to one of our client's
2989 // ongoing transactions, we close it. This will unregister it from
2990 // IfaceMgr and ensure the client starts over with a fresh connection
2991 // if it needs to do so.
2992 client_->closeIfOutOfBand(tcp_native_fd);
2993}
2994
2995void
2996HAService::clientCloseHandler(int tcp_native_fd) {
2997 if (tcp_native_fd >= 0) {
2998 IfaceMgr::instance().deleteExternalSocket(tcp_native_fd);
2999 }
3000};
3001
3002size_t
3003HAService::pendingRequestSize() {
3004 if (MultiThreadingMgr::instance().getMode()) {
3005 std::lock_guard<std::mutex> lock(mutex_);
3006 return (pending_requests_.size());
3007 } else {
3008 return (pending_requests_.size());
3009 }
3010}
3011
3012template<typename QueryPtrType>
3013int
3014HAService::getPendingRequest(const QueryPtrType& query) {
3015 if (MultiThreadingMgr::instance().getMode()) {
3016 std::lock_guard<std::mutex> lock(mutex_);
3017 return (getPendingRequestInternal(query));
3018 } else {
3019 return (getPendingRequestInternal(query));
3020 }
3021}
3022
3023template<typename QueryPtrType>
3024int
3025HAService::getPendingRequestInternal(const QueryPtrType& query) {
3026 if (pending_requests_.count(query) == 0) {
3027 return (0);
3028 } else {
3029 return (pending_requests_[query]);
3030 }
3031}
3032
3033void
3034HAService::checkPermissionsClientAndListener() {
3035 // Since this function is used as CS callback all exceptions must be
3036 // suppressed (except the @ref MultiThreadingInvalidOperation), unlikely
3037 // though they may be.
3038 // The @ref MultiThreadingInvalidOperation is propagated to the scope of the
3039 // @ref MultiThreadingCriticalSection constructor.
3040 try {
3041 if (client_) {
3042 client_->checkPermissions();
3043 }
3044
3045 if (listener_) {
3046 listener_->checkPermissions();
3047 }
3048 } catch (const isc::MultiThreadingInvalidOperation& ex) {
3050 .arg(ex.what());
3051 // The exception needs to be propagated to the caller of the
3052 // @ref MultiThreadingCriticalSection constructor.
3053 throw;
3054 } catch (const std::exception& ex) {
3056 .arg(ex.what());
3057 }
3058}
3059
3060void
3061HAService::startClientAndListener() {
3062 // Add critical section callbacks.
3063 MultiThreadingMgr::instance().addCriticalSectionCallbacks("HA_MT",
3064 std::bind(&HAService::checkPermissionsClientAndListener, this),
3065 std::bind(&HAService::pauseClientAndListener, this),
3066 std::bind(&HAService::resumeClientAndListener, this));
3067
3068 if (client_) {
3069 client_->start();
3070 }
3071
3072 if (listener_) {
3073 listener_->start();
3074 }
3075}
3076
3077void
3078HAService::pauseClientAndListener() {
3079 // Since this function is used as CS callback all exceptions must be
3080 // suppressed, unlikely though they may be.
3081 try {
3082 if (client_) {
3083 client_->pause();
3084 }
3085
3086 if (listener_) {
3087 listener_->pause();
3088 }
3089 } catch (const std::exception& ex) {
3091 .arg(ex.what());
3092 }
3093}
3094
3095void
3096HAService::resumeClientAndListener() {
3097 // Since this function is used as CS callback all exceptions must be
3098 // suppressed, unlikely though they may be.
3099 try {
3100 if (client_) {
3101 client_->resume();
3102 }
3103
3104 if (listener_) {
3105 listener_->resume();
3106 }
3107 } catch (std::exception& ex) {
3109 .arg(ex.what());
3110 }
3111}
3112
3113void
3114HAService::stopClientAndListener() {
3115 // Remove critical section callbacks.
3116 MultiThreadingMgr::instance().removeCriticalSectionCallbacks("HA_MT");
3117
3118 if (client_) {
3119 client_->stop();
3120 }
3121
3122 if (listener_) {
3123 listener_->stop();
3124 }
3125}
3126
3127// Explicit instantiations.
3128template int HAService::getPendingRequest(const Pkt4Ptr&);
3129template int HAService::getPendingRequest(const Pkt6Ptr&);
3130
3131} // end of namespace isc::ha
3132} // end of namespace isc
if(!(yy_init))
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
Exception thrown when a worker thread is trying to stop or pause the respective thread pool (which wo...
A generic exception that is thrown when an unexpected error condition occurs.
A multi-threaded HTTP listener that can process API commands requests.
A standard control channel exception that is thrown if a function is there is a problem with one of t...
static data::ConstElementPtr createLease4Delete(const dhcp::Lease4 &lease4)
Creates lease4-del command.
static data::ConstElementPtr createLease4Update(const dhcp::Lease4 &lease4)
Creates lease4-update command.
static data::ConstElementPtr createLease6BulkApply(const dhcp::Lease6CollectionPtr &leases, const dhcp::Lease6CollectionPtr &deleted_leases)
Creates lease6-bulk-apply command.
Holds communication state between DHCPv4 servers.
Holds communication state between DHCPv6 servers.
Role
Server's role in the High Availability setup.
Definition: ha_config.h:70
static std::string roleToString(const HAConfig::PeerConfig::Role &role)
Returns role name.
Definition: ha_config.cc:79
std::map< std::string, PeerConfigPtr > PeerConfigMap
Map of the servers' configurations.
Definition: ha_config.h:232
static std::string HAModeToString(const HAMode &ha_mode)
Returns HA mode name.
Definition: ha_config.cc:224
boost::shared_ptr< PeerConfig > PeerConfigPtr
Pointer to the server's configuration.
Definition: ha_config.h:229
static const int HA_MAINTENANCE_START_EVT
ha-maintenance-start command received.
Definition: ha_service.h:62
bool inScope(dhcp::Pkt4Ptr &query4)
Checks if the DHCPv4 query should be processed by this server.
Definition: ha_service.cc:994
void adjustNetworkState()
Enables or disables network state depending on the served scopes.
Definition: ha_service.cc:1024
void stopClientAndListener()
Stop the client and(or) listener instances.
Definition: ha_service.cc:3114
int getNormalState() const
Returns normal operation state for the current configuration.
Definition: ha_service.cc:952
bool shouldQueueLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be queued.
Definition: ha_service.cc:1474
static const int HA_HEARTBEAT_COMPLETE_EVT
Finished heartbeat command.
Definition: ha_service.h:47
bool isMaintenanceCanceled() const
Convenience method checking if the current state is a result of canceling the maintenance.
Definition: ha_service.cc:1096
void asyncSendLeaseUpdate(const QueryPtrType &query, const HAConfig::PeerConfigPtr &config, const data::ConstElementPtr &command, const hooks::ParkingLotHandlePtr &parking_lot)
Asynchronously sends lease update to the peer.
Definition: ha_service.cc:1315
void verboseTransition(const unsigned state)
Transitions to a desired state and logs it.
Definition: ha_service.cc:887
bool sendLeaseUpdatesFromBacklog()
Attempts to send all lease updates from the backlog synchronously.
Definition: ha_service.cc:2408
config::CmdHttpListenerPtr listener_
HTTP listener instance used to receive and respond to HA commands and lease updates.
Definition: ha_service.h:1163
bool leaseUpdateComplete(QueryPtrType &query, const hooks::ParkingLotHandlePtr &parking_lot)
Handle last pending request for this query.
Definition: ha_service.cc:1261
HAConfigPtr config_
Pointer to the HA hooks library configuration.
Definition: ha_service.h:1153
bool shouldTerminate() const
Indicates if the server should transition to the terminated state as a result of high clock skew.
Definition: ha_service.cc:1083
void terminatedStateHandler()
Handler for "terminated" state.
Definition: ha_service.cc:758
dhcp::NetworkStatePtr network_state_
Pointer to the state of the DHCP service (enabled/disabled).
Definition: ha_service.h:1150
HAService(const asiolink::IOServicePtr &io_service, const dhcp::NetworkStatePtr &network_state, const HAConfigPtr &config, const HAServerType &server_type=HAServerType::DHCPv4)
Constructor.
Definition: ha_service.cc:66
void scheduleHeartbeat()
Schedules asynchronous heartbeat to a peer if it is not scheduled.
Definition: ha_service.cc:1774
void passiveBackupStateHandler()
Handler for "passive-backup" state.
Definition: ha_service.cc:572
QueryFilter query_filter_
Selects queries to be processed/dropped.
Definition: ha_service.h:1169
static const int HA_MAINTENANCE_NOTIFY_EVT
ha-maintenance-notify command received.
Definition: ha_service.h:59
static const int HA_SYNCED_PARTNER_UNAVAILABLE_EVT
The heartbeat command failed after receiving ha-sync-complete-notify command from the partner.
Definition: ha_service.h:69
void inMaintenanceStateHandler()
Handler for the "in-maintenance" state.
Definition: ha_service.cc:418
virtual void verifyEvents()
Verifies events used by the HA service.
Definition: ha_service.cc:144
void conditionalLogPausedState() const
Logs if the server is paused in the current state.
Definition: ha_service.cc:978
bool unpause()
Unpauses the HA state machine with logging.
Definition: ha_service.cc:968
static const int HA_CONTROL_RESULT_MAINTENANCE_NOT_ALLOWED
Control result returned in response to ha-maintenance-notify.
Definition: ha_service.h:72
void serveDefaultScopes()
Instructs the HA service to serve default scopes.
Definition: ha_service.cc:989
size_t asyncSendLeaseUpdates(const dhcp::Pkt4Ptr &query, const dhcp::Lease4CollectionPtr &leases, const dhcp::Lease4CollectionPtr &deleted_leases, const hooks::ParkingLotHandlePtr &parking_lot)
Schedules asynchronous IPv4 leases updates.
Definition: ha_service.cc:1131
static const int HA_SYNCING_SUCCEEDED_EVT
Lease database synchronization succeeded.
Definition: ha_service.h:56
bool sendHAReset()
Sends ha-reset command to partner synchronously.
Definition: ha_service.cc:2499
std::function< void(const bool, const std::string &, const int)> PostRequestCallback
Callback invoked when request was sent and a response received or an error occurred.
Definition: ha_service.h:82
virtual void defineEvents()
Defines events used by the HA service.
Definition: ha_service.cc:130
asiolink::IOServicePtr io_service_
Pointer to the IO service object shared between this hooks library and the DHCP server.
Definition: ha_service.h:1147
CommunicationStatePtr communication_state_
Holds communication state with a peer.
Definition: ha_service.h:1166
LeaseUpdateBacklog lease_update_backlog_
Backlog of DHCP lease updates.
Definition: ha_service.h:1283
virtual ~HAService()
Destructor.
Definition: ha_service.cc:122
static const int HA_SYNCING_FAILED_EVT
Lease database synchronization failed.
Definition: ha_service.h:53
static const int HA_MAINTENANCE_CANCEL_EVT
ha-maintenance-cancel command received.
Definition: ha_service.h:65
void readyStateHandler()
Handler for "ready" state.
Definition: ha_service.cc:590
virtual void defineStates()
Defines states of the HA service.
Definition: ha_service.cc:158
void backupStateHandler()
Handler for the "backup" state.
Definition: ha_service.cc:211
void communicationRecoveryHandler()
Handler for the "communication-recovery" state.
Definition: ha_service.cc:226
bool isPartnerStateInvalid() const
Indicates if the partner's state is invalid.
Definition: ha_service.cc:1101
int synchronize(std::string &status_message, const std::string &server_name, const unsigned int max_period)
Synchronizes lease database with a partner.
Definition: ha_service.cc:2208
void normalStateHandler()
Handler for the "hot-standby" and "load-balancing" states.
Definition: ha_service.cc:343
void waitingStateHandler()
Handler for "waiting" state.
Definition: ha_service.cc:779
bool shouldSendLeaseUpdates(const HAConfig::PeerConfigPtr &peer_config) const
Checks if the lease updates should be sent as result of leases allocation or release.
Definition: ha_service.cc:1442
static const int HA_LEASE_UPDATES_COMPLETE_EVT
Finished lease updates commands.
Definition: ha_service.h:50
void partnerDownStateHandler()
Handler for "partner-down" state.
Definition: ha_service.cc:443
http::HttpClientPtr client_
HTTP client instance used to send HA commands and lease updates.
Definition: ha_service.h:1159
void updatePendingRequest(QueryPtrType &query)
Update pending request counter for this query.
Definition: ha_service.cc:1294
bool shouldPartnerDown() const
Indicates if the server should transition to the partner down state.
Definition: ha_service.cc:1056
std::function< void(const bool, const std::string &, const bool)> PostSyncCallback
Callback invoked when lease database synchronization is complete.
Definition: ha_service.h:91
void syncingStateHandler()
Handler for "syncing" state.
Definition: ha_service.cc:669
void partnerInMaintenanceStateHandler()
Handler for "partner-in-maintenance" state.
Definition: ha_service.cc:533
bool push(const OpType op_type, const dhcp::LeasePtr &lease)
Appends lease update to the queue.
OpType
Type of the lease update (operation type).
void clear()
Removes all lease updates from the queue.
bool wasOverflown()
Checks if the queue was overflown.
bool inScope(const dhcp::Pkt4Ptr &query4, std::string &scope_class) const
Checks if this server should process the DHCPv4 query.
void serveFailoverScopes()
Enable scopes required in failover case.
void serveDefaultScopes()
Serve default scopes for the given HA mode.
void serveNoScopes()
Disables all scopes.
Represents HTTP Host header.
Definition: http_header.h:68
HTTP client class.
Definition: client.h:87
void asyncSendRequest(const Url &url, const asiolink::TlsContextPtr &tls_context, const HttpRequestPtr &request, const HttpResponsePtr &response, const RequestHandler &request_callback, const RequestTimeout &request_timeout=RequestTimeout(10000), const ConnectHandler &connect_callback=ConnectHandler(), const HandshakeHandler &handshake_callback=HandshakeHandler(), const CloseHandler &close_callback=CloseHandler())
Queues new asynchronous HTTP request for a given URL.
Definition: client.cc:1966
This class parses and generates time values used in HTTP.
Definition: date_time.h:41
std::string rfc1123Format() const
Returns time value formatted as specified in RFC 1123.
Definition: date_time.cc:30
const EventPtr & getEvent(unsigned int value)
Fetches the event referred to by value.
Definition: state_model.cc:186
std::string getStateLabel(const int state) const
Fetches the label associated with an state value.
Definition: state_model.cc:421
void unpauseModel()
Unpauses state model.
Definition: state_model.cc:276
bool isModelPaused() const
Returns whether or not the model is paused.
Definition: state_model.cc:415
void postNextEvent(unsigned int event)
Sets the next event to the given event value.
Definition: state_model.cc:320
void defineState(unsigned int value, const std::string &label, StateHandler handler, const StatePausing &state_pausing=STATE_PAUSE_NEVER)
Adds an state value and associated label to the set of states.
Definition: state_model.cc:196
bool doOnExit()
Checks if on exit flag is true.
Definition: state_model.cc:347
unsigned int getNextEvent() const
Fetches the model's next event.
Definition: state_model.cc:373
void defineEvent(unsigned int value, const std::string &label)
Adds an event value and associated label to the set of events.
Definition: state_model.cc:170
void transition(unsigned int state, unsigned int event)
Sets up the model to transition into given state with a given event.
Definition: state_model.cc:264
bool doOnEntry()
Checks if on entry flag is true.
Definition: state_model.cc:339
static const int NOP_EVT
Signifies that no event has occurred.
Definition: state_model.h:292
void startModel(const int start_state)
Begins execution of the model.
Definition: state_model.cc:100
unsigned int getLastEvent() const
Fetches the model's last event.
Definition: state_model.cc:367
unsigned int getCurrState() const
Fetches the model's current state.
Definition: state_model.cc:355
Utility class to measure code execution times.
Definition: stopwatch.h:35
void stop()
Stops the stopwatch.
Definition: stopwatch.cc:35
std::string logFormatLastDuration() const
Returns the last measured duration in the format directly usable in log messages.
Definition: stopwatch.cc:75
This file contains several functions and constants that are used for handling commands and responses ...
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
An abstract API for lease database.
#define LOG_ERROR(LOGGER, MESSAGE)
Macro to conveniently test error output and log it.
Definition: macros.h:32
#define LOG_INFO(LOGGER, MESSAGE)
Macro to conveniently test info output and log it.
Definition: macros.h:20
#define LOG_WARN(LOGGER, MESSAGE)
Macro to conveniently test warn output and log it.
Definition: macros.h:26
const int CONTROL_RESULT_EMPTY
Status code indicating that the specified command was completed correctly, but failed to produce any ...
const char * CONTROL_TEXT
String used for storing textual description ("text")
constexpr long TIMEOUT_DEFAULT_HTTP_CLIENT_REQUEST
Timeout for the HTTP clients awaiting a response to a request.
Definition: timeouts.h:38
const int CONTROL_RESULT_ERROR
Status code indicating a general failure.
const int CONTROL_RESULT_COMMAND_UNSUPPORTED
Status code indicating that the specified command is not supported.
ConstElementPtr createAnswer(const int status_code, const std::string &text, const ConstElementPtr &arg)
ConstElementPtr parseAnswer(int &rcode, const ConstElementPtr &msg)
const char * CONTROL_RESULT
String used for result, i.e. integer status ("result")
const int CONTROL_RESULT_SUCCESS
Status code indicating a successful operation.
boost::shared_ptr< const Element > ConstElementPtr
Definition: data.h:27
boost::shared_ptr< Element > ElementPtr
Definition: data.h:24
boost::shared_ptr< isc::dhcp::Pkt > PktPtr
A pointer to either Pkt4 or Pkt6 packet.
Definition: pkt.h:797
std::string ClientClass
Defines a single class name.
Definition: classify.h:37
boost::shared_ptr< Lease4Collection > Lease4CollectionPtr
A shared pointer to the collection of IPv4 leases.
Definition: lease.h:490
boost::shared_ptr< Pkt4 > Pkt4Ptr
A pointer to Pkt4 object.
Definition: pkt4.h:544
boost::shared_ptr< Lease > LeasePtr
Pointer to the lease object.
Definition: lease.h:26
boost::shared_ptr< NetworkState > NetworkStatePtr
Pointer to the NetworkState object.
boost::shared_ptr< Lease6Collection > Lease6CollectionPtr
A shared pointer to the collection of IPv6 leases.
Definition: lease.h:644
boost::shared_ptr< Pkt6 > Pkt6Ptr
A pointer to Pkt6 packet.
Definition: pkt6.h:28
boost::shared_ptr< Lease4 > Lease4Ptr
Pointer to a Lease4 structure.
Definition: lease.h:283
const isc::log::MessageID HA_INVALID_PARTNER_STATE_LOAD_BALANCING
Definition: ha_messages.h:51
const isc::log::MessageID HA_RESUME_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:93
const isc::log::MessageID HA_LOCAL_DHCP_ENABLE
Definition: ha_messages.h:76
const isc::log::MessageID HA_LEASES_BACKLOG_NOTHING_TO_SEND
Definition: ha_messages.h:58
const isc::log::MessageID HA_LEASES_BACKLOG_FAILED
Definition: ha_messages.h:57
const isc::log::MessageID HA_SYNC_FAILED
Definition: ha_messages.h:103
const isc::log::MessageID HA_TERMINATED_RESTART_PARTNER
Definition: ha_messages.h:108
const int HA_PASSIVE_BACKUP_ST
In passive-backup state with a single active server and backup servers.
const int HA_HOT_STANDBY_ST
Hot standby state.
const isc::log::MessageID HA_INVALID_PARTNER_STATE_COMMUNICATION_RECOVERY
Definition: ha_messages.h:49
const isc::log::MessageID HA_LEASES_BACKLOG_SUCCESS
Definition: ha_messages.h:60
const int HA_COMMUNICATION_RECOVERY_ST
Communication recovery state.
const isc::log::MessageID HA_STATE_MACHINE_CONTINUED
Definition: ha_messages.h:96
isc::log::Logger ha_logger("ha-hooks")
Definition: ha_log.h:17
const isc::log::MessageID HA_LEASES_SYNC_FAILED
Definition: ha_messages.h:62
const isc::log::MessageID HA_SYNC_SUCCESSFUL
Definition: ha_messages.h:106
const int HA_UNAVAILABLE_ST
Special state indicating that this server is unable to communicate with the partner.
const isc::log::MessageID HA_CONFIG_LEASE_UPDATES_DISABLED_REMINDER
Definition: ha_messages.h:33
const isc::log::MessageID HA_SERVICE_STARTED
Definition: ha_messages.h:95
const int HA_TERMINATED_ST
HA service terminated state.
const int HA_IN_MAINTENANCE_ST
In maintenance state.
const int HA_LOAD_BALANCING_ST
Load balancing state.
const isc::log::MessageID HA_DHCP_ENABLE_FAILED
Definition: ha_messages.h:42
const isc::log::MessageID HA_LEASE_UPDATE_DELETE_FAILED_ON_PEER
Definition: ha_messages.h:71
const isc::log::MessageID HA_LEASES_BACKLOG_START
Definition: ha_messages.h:59
const isc::log::MessageID HA_SYNC_START
Definition: ha_messages.h:105
const isc::log::MessageID HA_HEARTBEAT_FAILED
Definition: ha_messages.h:44
const int HA_PARTNER_DOWN_ST
Partner down state.
const isc::log::MessageID HA_LEASE_UPDATES_ENABLED
Definition: ha_messages.h:68
const isc::log::MessageID HA_INVALID_PARTNER_STATE_HOT_STANDBY
Definition: ha_messages.h:50
const isc::log::MessageID HA_STATE_MACHINE_PAUSED
Definition: ha_messages.h:97
const isc::log::MessageID HA_TERMINATED
Definition: ha_messages.h:107
const isc::log::MessageID HA_DHCP_DISABLE_FAILED
Definition: ha_messages.h:40
boost::shared_ptr< HAConfig > HAConfigPtr
Pointer to the High Availability configuration structure.
Definition: ha_config.h:760
const isc::log::MessageID HA_MAINTENANCE_STARTED_IN_PARTNER_DOWN
Definition: ha_messages.h:85
const int HA_PARTNER_IN_MAINTENANCE_ST
Partner in-maintenance state.
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_FAILED
Definition: ha_messages.h:81
const int HA_WAITING_ST
Server waiting state, i.e. waiting for another server to be ready.
HAServerType
Lists possible server types for which HA service is created.
const int HA_BACKUP_ST
Backup state.
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_ILLEGAL
Definition: ha_messages.h:89
const isc::log::MessageID HA_PAUSE_CLIENT_LISTENER_FAILED
Definition: ha_messages.h:88
const isc::log::MessageID HA_MAINTENANCE_SHUTDOWN_SAFE
Definition: ha_messages.h:83
const isc::log::MessageID HA_MAINTENANCE_NOTIFY_CANCEL_FAILED
Definition: ha_messages.h:79
const isc::log::MessageID HA_LEASE_UPDATES_DISABLED
Definition: ha_messages.h:67
const isc::log::MessageID HA_LOCAL_DHCP_DISABLE
Definition: ha_messages.h:75
const int HA_SYNCING_ST
Synchronizing database state.
const isc::log::MessageID HA_RESET_FAILED
Definition: ha_messages.h:91
const isc::log::MessageID HA_STATE_TRANSITION
Definition: ha_messages.h:98
const isc::log::MessageID HA_CONFIG_LEASE_SYNCING_DISABLED_REMINDER
Definition: ha_messages.h:30
std::string stateToString(int state)
Returns state name.
const int HA_READY_ST
Server ready state, i.e. synchronized database, can enable DHCP service.
const isc::log::MessageID HA_SYNC_COMPLETE_NOTIFY_FAILED
Definition: ha_messages.h:101
const isc::log::MessageID HA_COMMUNICATION_INTERRUPTED
Definition: ha_messages.h:20
const isc::log::MessageID HA_MAINTENANCE_STARTED
Definition: ha_messages.h:84
const isc::log::MessageID HA_LEASE_UPDATE_CREATE_UPDATE_FAILED_ON_PEER
Definition: ha_messages.h:70
const isc::log::MessageID HA_LEASE_UPDATE_FAILED
Definition: ha_messages.h:72
const isc::log::MessageID HA_STATE_TRANSITION_PASSIVE_BACKUP
Definition: ha_messages.h:99
boost::shared_ptr< ParkingLotHandle > ParkingLotHandlePtr
Pointer to the parking lot handle.
Definition: parking_lots.h:381
boost::shared_ptr< PostHttpRequestJson > PostHttpRequestJsonPtr
Pointer to PostHttpRequestJson.
boost::shared_ptr< HttpResponseJson > HttpResponseJsonPtr
Pointer to the HttpResponseJson object.
Definition: response_json.h:24
boost::shared_ptr< HttpResponse > HttpResponsePtr
Pointer to the HttpResponse object.
Definition: response.h:78
const char * MessageID
Definition: message_types.h:15
Definition: edns.h:19
Defines the logger used by the top-level component of kea-lfc.
HTTP request/response timeout value.
Definition: client.h:90