-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathflags.cpp
More file actions
1569 lines (1407 loc) · 63.4 KB
/
flags.cpp
File metadata and controls
1569 lines (1407 loc) · 63.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "slave/flags.hpp"
#include <stout/error.hpp>
#include <stout/flags.hpp>
#include <stout/json.hpp>
#include <stout/option.hpp>
#include <stout/os.hpp>
#include <stout/path.hpp>
#include <mesos/type_utils.hpp>
#include "common/http.hpp"
#include "common/parse.hpp"
#include "common/protobuf_utils.hpp"
#include "slave/constants.hpp"
#ifdef __linux__
#include "slave/containerizer/mesos/linux_launcher.hpp"
#endif // __linux__
#include "slave/containerizer/mesos/provisioner/constants.hpp"
using std::string;
mesos::internal::slave::Flags::Flags()
{
add(&Flags::hostname,
"hostname",
"The hostname the agent should report.\n"
"If left unset, the hostname is resolved from the IP address\n"
"that the agent advertises; unless the user explicitly prevents\n"
"that, using `--no-hostname_lookup`, in which case the IP itself\n"
"is used.");
add(&Flags::hostname_lookup,
"hostname_lookup",
"Whether we should execute a lookup to find out the server's hostname,\n"
"if not explicitly set (via, e.g., `--hostname`).\n"
"True by default; if set to `false` it will cause Mesos\n"
"to use the IP address, unless the hostname is explicitly set.",
true);
add(&Flags::version,
"version",
"Show version and exit.",
false);
// TODO(benh): Is there a way to specify units for the resources?
add(&Flags::resources,
"resources",
"Total consumable resources per agent. Can be provided in JSON format\n"
"or as a semicolon-delimited list of key:value pairs, with the role\n"
"optionally specified.\n"
"\n"
"As a key:value list:\n"
"`name(role):value;name:value...`\n"
"\n"
"To use JSON, pass a JSON-formatted string or use\n"
"`--resources=filepath` to specify the resources via a file containing\n"
"a JSON-formatted string. `filepath` can only be of the form\n"
"`file:///path/to/file`.\n"
"\n"
"Example JSON:\n"
"[\n"
" {\n"
" \"name\": \"cpus\",\n"
" \"type\": \"SCALAR\",\n"
" \"scalar\": {\n"
" \"value\": 24\n"
" }\n"
" },\n"
" {\n"
" \"name\": \"mem\",\n"
" \"type\": \"SCALAR\",\n"
" \"scalar\": {\n"
" \"value\": 24576\n"
" }\n"
" }\n"
"]");
add(&Flags::resource_provider_config_dir,
"resource_provider_config_dir",
"Path to a directory that contains local resource provider configs.\n"
"Each file in the config dir should contain a JSON object representing\n"
"a `ResourceProviderInfo` object. Each local resource provider provides\n"
"resources that are local to the agent. It is also responsible for\n"
"handling operations on the resources it provides. Please note that\n"
"`resources` field might not need to be specified if the resource\n"
"provider determines the resources automatically.\n"
"\n"
"Example config file in this directory:\n"
"{\n"
" \"type\": \"org.mesos.apache.rp.local.storage\",\n"
" \"name\": \"lvm\"\n"
"}");
add(&Flags::disk_profile_adaptor,
"disk_profile_adaptor",
"The name of the disk profile adaptor module that storage resource\n"
"providers should use for translating a 'disk profile' into inputs\n"
"consumed by various Container Storage Interface (CSI) plugins.\n"
"If this flag is not specified, the default behavior for storage\n"
"resource providers is to only expose resources for pre-existing\n"
"volumes and not publish RAW volumes.");
add(&Flags::isolation,
"isolation",
"Isolation mechanisms to use, e.g., `posix/cpu,posix/mem` (or\n"
"`windows/cpu,windows/mem` if you are on Windows), or\n"
"`cgroups/cpu,cgroups/mem`, or `network/port_mapping`\n"
"(configure with flag: `--with-network-isolator` to enable),\n"
"or `gpu/nvidia` for nvidia specific gpu isolation,\n"
"or load an alternate isolator module using the `--modules`\n"
"flag. if `cgroups/all` is specified, any other cgroups related\n"
"isolation options (e.g., `cgroups/cpu`) will be ignored, and all\n"
"the local enabled cgroups subsystems on the agent host will be\n"
"automatically loaded by the cgroups isolator. Note that this flag\n"
"is only relevant for the Mesos Containerizer.",
#ifndef __WINDOWS__
"posix/cpu,posix/mem"
#else
"windows/cpu,windows/mem"
#endif // __WINDOWS__
);
add(&Flags::launcher,
"launcher",
"The launcher to be used for Mesos containerizer. It could either be\n"
"`linux` or `posix`. The Linux launcher is required for any isolator\n"
"that requires Linux namespaces such as network, pid, etc.\n"
"If unspecified, the agent will choose the Linux launcher\n"
"if it's running as root and freezer subsystem is enabled on Linux.",
#ifdef __linux__
LinuxLauncher::available() ? "linux" : "posix"
#elif defined(__WINDOWS__)
"windows"
#else
"posix"
#endif // __linux__
);
add(&Flags::image_providers,
"image_providers",
"Comma-separated list of supported image providers,\n"
"e.g., `APPC,DOCKER`.");
add(&Flags::image_provisioner_backend,
"image_provisioner_backend",
"Strategy for provisioning container rootfs from images,\n"
"e.g., `aufs`, `bind`, `copy`, `overlay`.");
add(&Flags::image_gc_config,
"image_gc_config",
"JSON-formatted configuration for automatic container image garbage\n"
"collection. This is an optional flag. If it is not set, it means\n"
"the automatic container image gc is not enabled. Users have to\n"
"trigger image gc manually via the operator API. If it is set, the\n"
"auto image gc is enabled. This image gc config can be provided either\n"
"as a path pointing to a local file, or as a JSON-formatted string.\n"
"Please note that the image garbage collection only work with Mesos\n"
"Containerizer for now.\n"
"\n"
"See the ImageGcConfig message in `flags.proto` for the expected\n"
"format.\n"
"\n"
"In the following example, image garbage collection is configured to\n"
"sample disk usage every hour, and will attempt to maintain at least\n"
"10 percent of free space on the container image filesystem:\n"
"{\n"
" \"image_disk_headroom\": 0.1,\n"
" \"image_disk_watch_interval\": {\n"
" \"nanoseconds\": 3600000000000\n"
" },\n"
" \"excluded_images\": []\n"
"}");
add(&Flags::appc_simple_discovery_uri_prefix,
"appc_simple_discovery_uri_prefix",
"URI prefix to be used for simple discovery of appc images,\n"
"e.g., `http://`, `https://`, `hdfs://<hostname>:9000/user/abc/cde`.",
"http://");
add(&Flags::appc_store_dir,
"appc_store_dir",
"Directory the appc provisioner will store images in.\n",
path::join(os::temp(), "mesos", "store", "appc"));
add(&Flags::docker_registry,
"docker_registry",
"The default url for Mesos containerizer to pull Docker images. It\n"
"could either be a Docker registry server url (e.g., `https://registry.docker.io`),\n" // NOLINT(whitespace/line_length)
"or a source that Docker image archives (result of `docker save`) are\n"
"stored. The Docker archive source could be specified either as a local\n"
"path (e.g., `/tmp/docker/images`), or as an HDFS URI (*experimental*)\n"
"(e.g., `hdfs://localhost:8020/archives/`). Note that this option won't\n"
"change the default registry server for Docker containerizer.",
"https://registry-1.docker.io");
add(&Flags::docker_store_dir,
"docker_store_dir",
"Directory the Docker provisioner will store images in",
path::join(os::temp(), "mesos", "store", "docker"));
add(&Flags::docker_volume_checkpoint_dir,
"docker_volume_checkpoint_dir",
"The root directory where we checkpoint the information about docker\n"
"volumes that each container uses.",
"/var/run/mesos/isolators/docker/volume");
add(&Flags::docker_ignore_runtime,
"docker_ignore_runtime",
"Ignore any runtime configuration specified in the Docker image. The\n"
"Mesos containerizer will not propagate Docker runtime specifications\n"
"such as `WORKDIR`, `ENV` and `CMD` to the container.\n",
false);
add(&Flags::default_role,
"default_role",
"Any resources in the `--resources` flag that\n"
"omit a role, as well as any resources that\n"
"are not present in `--resources` but that are\n"
"automatically detected, will be assigned to\n"
"this role.",
"*");
add(&Flags::attributes,
"attributes",
"Attributes of the agent machine, in the form:\n"
"`rack:2` or `rack:2;u:1`");
add(&Flags::fetcher_cache_size,
"fetcher_cache_size",
"Size of the fetcher cache in Bytes.",
DEFAULT_FETCHER_CACHE_SIZE);
add(&Flags::fetcher_cache_dir,
"fetcher_cache_dir",
"Directory for the fetcher cache. The agent will clear this directory\n"
"on startup. It is recommended to set this value to a separate volume\n"
"for several reasons:\n"
" * The cache directories are transient and not meant to be\n"
" backed up. Upon restarting the agent, the cache is always empty.\n"
" * The cache and container sandboxes can potentially interfere with\n"
" each other when occupying a shared space (i.e. disk contention).",
path::join(os::temp(), "mesos", "fetch"));
add(&Flags::fetcher_stall_timeout,
"fetcher_stall_timeout",
"Amount of time for the fetcher to wait before considering a download\n"
"being too slow and abort it when the download stalls (i.e., the speed\n"
"keeps below one byte per second).\n"
"NOTE: This feature only applies when downloading data from the net and\n"
"does not apply to HDFS.",
DEFAULT_FETCHER_STALL_TIMEOUT);
add(&Flags::work_dir,
"work_dir",
"Path of the agent work directory. This is where executor sandboxes\n"
"will be placed, as well as the agent's checkpointed state in case of\n"
"failover. Note that locations like `/tmp` which are cleaned\n"
"automatically are not suitable for the work directory when running in\n"
"production, since long-running agents could lose data when cleanup\n"
"occurs. (Example: `/var/lib/mesos/agent`)");
add(&Flags::runtime_dir,
"runtime_dir",
"Path of the agent runtime directory. This is where runtime data\n"
"is stored by an agent that it needs to persist across crashes (but\n"
"not across reboots). This directory will be cleared on reboot.\n"
"(Example: `/var/run/mesos`)",
[]() -> string {
Try<string> var = os::var();
if (var.isSome()) {
#ifdef __WINDOWS__
const string prefix(var.get());
#else
const string prefix(path::join(var.get(), "run"));
#endif // __WINDOWS__
// We check for access on the prefix because the remainder
// of the directory structure is created by the agent later.
Try<bool> access = os::access(prefix, R_OK | W_OK);
if (access.isSome() && access.get()) {
#ifdef __WINDOWS__
return path::join(prefix, "mesos", "runtime");
#else
return path::join(prefix, "mesos");
#endif // __WINDOWS__
}
}
// We provide a fallback path for ease of use in case `os::var()`
// errors or if the directory is not accessible.
return path::join(os::temp(), "mesos", "runtime");
}());
add(&Flags::launcher_dir, // TODO(benh): This needs a better name.
"launcher_dir",
"Directory path of Mesos binaries. Mesos looks for the fetcher,\n"
"containerizer, and executor binary files under this directory.",
PKGLIBEXECDIR);
add(&Flags::hadoop_home,
"hadoop_home",
"Path to find Hadoop installed (for\n"
"fetching framework executors from HDFS)\n"
"(no default, look for `HADOOP_HOME` in\n"
"environment or find hadoop on `PATH`)");
#ifndef __WINDOWS__
add(&Flags::switch_user,
"switch_user",
"If set to `true`, the agent will attempt to run tasks as\n"
"the `user` who submitted them (as defined in `FrameworkInfo`)\n"
"(this requires `setuid` permission and that the given `user`\n"
"exists on the agent).\n"
"If the user does not exist, an error occurs and the task will fail.\n"
"If set to `false`, tasks will be run as the same user as the Mesos\n"
"agent process.\n"
"NOTE: This feature is not yet supported on Windows agent, and\n"
"therefore the flag currently does not exist on that platform.",
true);
add(&Flags::volume_gid_range,
"volume_gid_range",
"When this flag is specified, if a task running as non-root user uses a\n"
"shared persistent volume or a PARENT type SANDBOX_PATH volume, the\n"
"volume will be owned by a gid allocated from this range and have the\n"
"`setgid` bit set, and the task process will be launched with the gid\n"
"as its supplementary group to make sure it can access the volume.\n"
"(Example: `[10000-20000]`)");
#endif // __WINDOWS__
add(&Flags::http_heartbeat_interval,
"http_heartbeat_interval",
"This flag sets a heartbeat interval (e.g. '5secs', '10mins') for\n"
"messages to be sent over persistent connections made against\n"
"the agent HTTP API. Currently, this only applies to the\n"
"'LAUNCH_NESTED_CONTAINER_SESSION' and 'ATTACH_CONTAINER_OUTPUT' calls.",
Seconds(30));
add(&Flags::frameworks_home,
"frameworks_home",
"Directory path prepended to relative executor URIs", "");
add(&Flags::registration_backoff_factor,
"registration_backoff_factor",
"Agent initially picks a random amount of time between `[0, b]`, where\n"
"`b = registration_backoff_factor`, to (re-)register with a new master.\n"
"Subsequent retries are exponentially backed off based on this\n"
"interval (e.g., 1st retry uses a random value between `[0, b * 2^1]`,\n"
"2nd retry between `[0, b * 2^2]`, 3rd retry between `[0, b * 2^3]`,\n"
"etc) up to a maximum of " + stringify(REGISTER_RETRY_INTERVAL_MAX),
DEFAULT_REGISTRATION_BACKOFF_FACTOR);
add(&Flags::authentication_backoff_factor,
"authentication_backoff_factor",
"The agent will time out its authentication with the master based on\n"
"exponential backoff. The timeout will be randomly chosen within the\n"
"range `[min, min + factor*2^n]` where `n` is the number of failed\n"
"attempts. To tune these parameters, set the\n"
"`--authentication_timeout_[min|max|factor]` flags.\n",
DEFAULT_AUTHENTICATION_BACKOFF_FACTOR);
add(&Flags::authentication_timeout_min,
"authentication_timeout_min",
"The minimum amount of time the agent waits before retrying\n"
"authenticating with the master. See `authentication_backoff_factor`\n"
"for more details. NOTE that since authentication retry cancels the\n"
"previous authentication request, one should consider what is the\n"
"normal authentication delay when setting this flag to prevent\n"
"premature retry.",
DEFAULT_AUTHENTICATION_TIMEOUT_MIN);
add(&Flags::authentication_timeout_max,
"authentication_timeout_max",
"The maximum amount of time the agent waits before retrying\n"
"authenticating with the master. See `authentication_backoff_factor`\n"
"for more details.",
DEFAULT_AUTHENTICATION_TIMEOUT_MAX);
add(&Flags::executor_environment_variables,
"executor_environment_variables",
"JSON object representing the environment variables that should be\n"
"passed to the executor, and thus subsequently task(s). By default this\n"
"flag is none. Users have to define executor environment explicitly.\n"
"Example:\n"
"{\n"
" \"PATH\": \"/bin:/usr/bin\",\n"
" \"LD_LIBRARY_PATH\": \"/usr/local/lib\"\n"
"}",
[](const Option<JSON::Object>& object) -> Option<Error> {
if (object.isSome()) {
foreachvalue (const JSON::Value& value, object->values) {
if (!value.is<JSON::String>()) {
return Error("`executor_environment_variables` must "
"only contain string values");
}
}
}
return None();
});
add(&Flags::executor_registration_timeout,
"executor_registration_timeout",
"Amount of time to wait for an executor\n"
"to register with the agent before considering it hung and\n"
"shutting it down (e.g., 60secs, 3mins, etc)",
EXECUTOR_REGISTRATION_TIMEOUT);
add(&Flags::executor_reregistration_timeout,
"executor_reregistration_timeout",
"The timeout within which an executor is expected to reregister after\n"
"the agent has restarted, before the agent considers it gone and shuts\n"
"it down. Note that currently, the agent will not reregister with the\n"
"master until this timeout has elapsed (see MESOS-7539).",
EXECUTOR_REREGISTRATION_TIMEOUT,
[](const Duration& value) -> Option<Error> {
if (value > MAX_EXECUTOR_REREGISTRATION_TIMEOUT) {
return Error("Expected `--executor_reregistration_timeout` "
"to be not more than " +
stringify(MAX_EXECUTOR_REREGISTRATION_TIMEOUT));
}
return None();
});
// TODO(bmahler): Remove this once v0 executors are no longer supported.
add(&Flags::executor_reregistration_retry_interval,
"executor_reregistration_retry_interval",
"For PID-based executors, how long the agent waits before retrying\n"
"the reconnect message sent to the executor during recovery.\n"
"NOTE: Do not use this unless you understand the following\n"
"(see MESOS-5332): PID-based executors using Mesos libraries >= 1.1.2\n"
"always re-link with the agent upon receiving the reconnect message.\n"
"This avoids the executor replying on a half-open TCP connection to\n"
"the old agent (possible if netfilter is dropping packets,\n"
"see: MESOS-7057). However, PID-based executors using Mesos\n"
"libraries < 1.1.2 do not re-link and are therefore prone to\n"
"replying on a half-open connection after the agent restarts. If we\n"
"only send a single reconnect message, these \"old\" executors will\n"
"reply on their half-open connection and receive a RST; without any\n"
"retries, they will fail to reconnect and be killed by the agent once\n"
"the executor re-registration timeout elapses. To ensure these \"old\"\n"
"executors can reconnect in the presence of netfilter dropping\n"
"packets, we introduced optional retries of the reconnect message.\n"
"This results in \"old\" executors correctly establishing a link\n"
"when processing the second reconnect message.");
add(&Flags::executor_shutdown_grace_period,
"executor_shutdown_grace_period",
"Default amount of time to wait for an executor to shut down\n"
"(e.g. 60secs, 3mins, etc). ExecutorInfo.shutdown_grace_period\n"
"overrides this default. Note that the executor must not assume\n"
"that it will always be allotted the full grace period, as the\n"
"agent may decide to allot a shorter period, and failures / forcible\n"
"terminations may occur.",
DEFAULT_EXECUTOR_SHUTDOWN_GRACE_PERIOD);
#ifdef USE_SSL_SOCKET
add(&Flags::jwt_secret_key,
"jwt_secret_key",
flags::DeprecatedName("executor_secret_key"),
"Path to a file containing the key used when generating JWT secrets.\n"
"This flag is only available when Mesos is built with SSL support.");
#endif // USE_SSL_SOCKET
add(&Flags::gc_delay,
"gc_delay",
"Maximum amount of time to wait before cleaning up\n"
"executor directories (e.g., 3days, 2weeks, etc).\n"
"Note that this delay may be shorter depending on\n"
"the available disk usage.",
GC_DELAY);
add(&Flags::gc_disk_headroom,
"gc_disk_headroom",
"Adjust disk headroom used to calculate maximum executor\n"
"directory age. Age is calculated by:\n"
"`gc_delay * max(0.0, (1.0 - gc_disk_headroom - disk usage))`\n"
"every `--disk_watch_interval` duration. `gc_disk_headroom` must\n"
"be a value between 0.0 and 1.0",
GC_DISK_HEADROOM);
add(&Flags::gc_non_executor_container_sandboxes,
"gc_non_executor_container_sandboxes",
"Determines whether nested container sandboxes created via the\n"
"LAUNCH_CONTAINER and LAUNCH_NESTED_CONTAINER APIs will be\n"
"automatically garbage collected by the agent upon termination.\n"
"The REMOVE_(NESTED_)CONTAINER API is unaffected by this flag\n"
"and can still be used.",
false);
add(&Flags::disk_watch_interval,
"disk_watch_interval",
"Periodic time interval (e.g., 10secs, 2mins, etc)\n"
"to check the overall disk usage managed by the agent.\n"
"This drives the garbage collection of archived\n"
"information and sandboxes.",
DISK_WATCH_INTERVAL);
add(&Flags::container_logger,
"container_logger",
"The name of the container logger to use for logging container\n"
"(i.e., executor and task) stdout and stderr. The default\n"
"container logger writes to `stdout` and `stderr` files\n"
"in the sandbox directory.");
add(&Flags::recover,
"recover",
"Whether to recover status updates and reconnect with old executors.\n"
"Valid values for `recover` are\n"
"reconnect: Reconnect with any old live executors.\n"
"cleanup : Kill any old live executors and exit.\n"
" Use this option when doing an incompatible agent\n"
" or executor upgrade.",
"reconnect");
add(&Flags::recovery_timeout,
"recovery_timeout",
"Amount of time allotted for the agent to recover. If the agent takes\n"
"longer than recovery_timeout to recover, any executors that are\n"
"waiting to reconnect to the agent will self-terminate.\n"
"The best value of this flag depends on the frameworks being run.\n"
"For non-partition-aware frameworks, it makes sense to set this\n"
"close to the `agent_reregister_timeout` on the master.\n"
"For partition-aware frameworks, it makes sense to set this higher\n"
"than the timeout that the framework uses to give up on the task,\n"
"otherwise the executor might terminate even if the task could still\n"
"successfully reconnect to the framework.",
RECOVERY_TIMEOUT);
add(&Flags::reconfiguration_policy,
"reconfiguration_policy",
"This flag controls which agent configuration changes are considered\n"
"acceptable when recovering the previous agent state. Possible values:\n"
"equal: The old and the new state must match exactly.\n"
"additive: The new state must be a superset of the old state:\n"
" it is permitted to add additional resources, attributes\n"
" and domains but not to remove or to modify existing ones.\n"
"Note that this only affects the checking done on the agent itself,\n"
"the master may still reject the agent if it detects a change that it\n"
"considers unacceptable, which, e.g., currently happens when port or\n"
"hostname are changed.",
"equal");
add(&Flags::strict,
"strict",
"If `strict=true`, any and all recovery errors are considered fatal.\n"
"If `strict=false`, any expected errors (e.g., agent cannot recover\n"
"information about an executor, because the agent died right before\n"
"the executor registered.) during recovery are ignored and as much\n"
"state as possible is recovered.\n",
true);
add(&Flags::max_completed_executors_per_framework,
"max_completed_executors_per_framework",
"Maximum number of completed executors per framework to store\n"
"in memory.\n",
DEFAULT_MAX_COMPLETED_EXECUTORS_PER_FRAMEWORK);
#ifdef __linux__
add(&Flags::cgroups_destroy_timeout,
"cgroups_destroy_timeout",
"Amount of time allowed to destroy a cgroup hierarchy. If the cgroup\n"
"hierarchy is not destroyed within the timeout, the corresponding\n"
"container destroy is considered failed.",
Seconds(60));
add(&Flags::cgroups_hierarchy,
"cgroups_hierarchy",
"The path to the cgroups hierarchy root\n", "/sys/fs/cgroup");
add(&Flags::cgroups_root,
"cgroups_root",
"Name of the root cgroup\n",
"mesos");
add(&Flags::cgroups_enable_cfs,
"cgroups_enable_cfs",
"Cgroups feature flag to enable hard limits on CPU resources\n"
"via the CFS bandwidth limiting subfeature.\n",
false);
// TODO(antonl): Set default to true in future releases.
add(&Flags::cgroups_limit_swap,
"cgroups_limit_swap",
"Cgroups feature flag to enable memory limits on both memory and\n"
"swap instead of just memory.\n",
false);
add(&Flags::cgroups_cpu_enable_pids_and_tids_count,
"cgroups_cpu_enable_pids_and_tids_count",
"Cgroups feature flag to enable counting of processes and threads\n"
"inside a container.\n",
false);
add(&Flags::cgroups_net_cls_primary_handle,
"cgroups_net_cls_primary_handle",
"A non-zero, 16-bit handle of the form `0xAAAA`. This will be \n"
"used as the primary handle for the net_cls cgroup.");
add(&Flags::cgroups_net_cls_secondary_handles,
"cgroups_net_cls_secondary_handles",
"A range of the form 0xAAAA,0xBBBB, specifying the valid secondary\n"
"handles that can be used with the primary handle. This will take\n"
"effect only when the `--cgroups_net_cls_primary_handle is set.");
add(&Flags::allowed_devices,
"allowed_devices",
"JSON array representing the devices that will be additionally\n"
"whitelisted by cgroups devices subsystem. Noted that the following\n"
"devices always be whitelisted by default:\n"
" * /dev/console\n"
" * /dev/tty0\n"
" * /dev/tty1\n"
" * /dev/pts/*\n"
" * /dev/ptmx\n"
" * /dev/net/tun\n"
" * /dev/null\n"
" * /dev/zero\n"
" * /dev/full\n"
" * /dev/tty\n"
" * /dev/urandom\n"
" * /dev/random\n"
"This flag will take effect only when `cgroups/devices` is set in\n"
"`--isolation` flag.\n"
"Example:\n"
"{\n"
" \"allowed_devices\": [\n"
" {\n"
" \"device\": {\n"
" \"path\": \"/path/to/device\"\n"
" },\n"
" \"access\": {\n"
" \"read\": true,\n"
" \"write\": false,\n"
" \"mknod\": false\n"
" }\n"
" }\n"
" ]\n"
"}\n");
add(&Flags::agent_subsystems,
"agent_subsystems",
flags::DeprecatedName("slave_subsystems"),
"List of comma-separated cgroup subsystems to run the agent binary\n"
"in, e.g., `memory,cpuacct`. The default is none.\n"
"Present functionality is intended for resource monitoring and\n"
"no cgroup limits are set, they are inherited from the root mesos\n"
"cgroup.");
add(&Flags::host_path_volume_force_creation,
"host_path_volume_force_creation",
"A colon-separated list of directories where descendant directories\n"
"are allowed to be created by the `volume/host_path` isolator,\n"
"if the directories do not exist.");
add(&Flags::nvidia_gpu_devices,
"nvidia_gpu_devices",
"A comma-separated list of Nvidia GPU devices. When `gpus` is\n"
"specified in the `--resources` flag, this flag determines which GPU\n"
"devices will be made available. The devices should be listed as\n"
"numbers that correspond to Nvidia's NVML device enumeration (as\n"
"seen by running the command `nvidia-smi` on an Nvidia GPU\n"
"equipped system). The GPUs listed will only be isolated if the\n"
"`--isolation` flag contains the string `gpu/nvidia`.");
add(&Flags::perf_events,
"perf_events",
"List of command-separated perf events to sample for each container\n"
"when using the perf_event isolator. Default is none.\n"
"Run command `perf list` to see all events. Event names are\n"
"sanitized by downcasing and replacing hyphens with underscores\n"
"when reported in the PerfStatistics protobuf, e.g., `cpu-cycles`\n"
"becomes `cpu_cycles`; see the PerfStatistics protobuf for all names.");
add(&Flags::perf_interval,
"perf_interval",
"Interval between the start of perf stat samples. Perf samples are\n"
"obtained periodically according to `perf_interval` and the most\n"
"recently obtained sample is returned rather than sampling on\n"
"demand. For this reason, `perf_interval` is independent of the\n"
"resource monitoring interval",
Seconds(60));
add(&Flags::perf_duration,
"perf_duration",
"Duration of a perf stat sample. The duration must be less\n"
"than the `perf_interval`.",
Seconds(10));
add(&Flags::revocable_cpu_low_priority,
"revocable_cpu_low_priority",
"Run containers with revocable CPU at a lower priority than\n"
"normal containers (non-revocable cpu). Currently only\n"
"supported by the cgroups/cpu isolator.",
true);
add(&Flags::systemd_enable_support,
"systemd_enable_support",
"Top level control of systemd support. When enabled, features such as\n"
"executor life-time extension are enabled unless there is an explicit\n"
"flag to disable these (see other flags). This should be enabled when\n"
"the agent is launched as a systemd unit.",
true);
add(&Flags::systemd_runtime_directory,
"systemd_runtime_directory",
"The path to the systemd system run time directory\n",
"/run/systemd/system");
add(&Flags::effective_capabilities,
"effective_capabilities",
flags::DeprecatedName("allowed_capabilities"),
"JSON representation of the Linux capabilities that the agent will\n"
"grant to a task that will be run in containers launched by the\n"
"containerizer (currently only supported by the Mesos Containerizer).\n"
"This set overrides the default capabilities for the user but not\n"
"the capabilities requested by the framework.\n"
"\n"
"To set capabilities the agent should have the `SETPCAP` capability.\n"
"\n"
"This flag is effective iff `linux/capabilities` isolation is enabled.\n"
"When `linux/capabilities` isolation is enabled, the absence of this\n"
"flag implies that the operator intends to allow ALL capabilities.\n"
"\n"
"Example:\n"
"{\n"
" \"capabilities\": [\n"
" \"NET_RAW\",\n"
" \"SYS_ADMIN\"\n"
" ]\n"
"}");
add(&Flags::bounding_capabilities,
"bounding_capabilities",
"JSON representation of the Linux capabilities that the operator\n"
"will allow as the maximum level of privilege that a task launched\n"
"by the containerizer may acquire (currently only supported by the\n"
"Mesos Containerizer).\n"
"\n"
"This flag is effective iff `linux/capabilities` isolation is enabled.\n"
"When `linux/capabilities` isolation is enabled, the absence of this\n"
"flag implies that the operator allows ALL capabilities.\n"
"\n"
"This flag has the same syntax as `--effective_capabilities`."
);
add(&Flags::disallow_sharing_agent_pid_namespace,
"disallow_sharing_agent_pid_namespace",
"If set to `true`, each top-level container will have its own pid\n"
"namespace, and if the framework requests to share the agent pid\n"
"namespace for the top level container, the container launch will be\n"
"rejected. If set to `false`, the top-level containers will share the\n"
"pid namespace with agent if the framework requests it. This flag will\n"
"be ignored if the `namespaces/pid` isolator is not enabled.\n",
false);
#endif
add(&Flags::agent_features,
"agent_features",
"JSON representation of agent features to whitelist. We always require\n"
"'MULTI_ROLE', 'HIERARCHICAL_ROLE', 'RESERVATION_REFINEMENT', and\n"
"'AGENT_OPERATION_FEEDBACK'.\n"
"\n"
"Example:\n"
"{\n"
" \"capabilities\": [\n"
" {\"type\": \"MULTI_ROLE\"},\n"
" {\"type\": \"HIERARCHICAL_ROLE\"},\n"
" {\"type\": \"RESERVATION_REFINEMENT\"},\n"
" {\"type\": \"AGENT_OPERATION_FEEDBACK\"}\n"
" ]\n"
"}\n",
[](const Option<SlaveCapabilities>& agentFeatures) -> Option<Error> {
// Check all required capabilities are enabled.
if (agentFeatures.isSome()) {
protobuf::slave::Capabilities capabilities(
agentFeatures->capabilities());
if (!capabilities.multiRole ||
!capabilities.hierarchicalRole ||
!capabilities.reservationRefinement ||
!capabilities.agentOperationFeedback) {
return Error(
"At least the following agent features need to be enabled:"
" MULTI_ROLE, HIERARCHICAL_ROLE, RESERVATION_REFINEMENT,"
" AGENT_OPERATION_FEEDBACK");
}
if (capabilities.resizeVolume && !capabilities.resourceProvider) {
return Error(
"RESIZE_VOLUME feature requires RESOURCE_PROVIDER feature");
}
if (capabilities.agentOperationFeedback &&
!capabilities.resourceProvider) {
return Error(
"AGENT_OPERATION_FEEDBACK feature"
" requires RESOURCE_PROVIDER feature");
}
}
return None();
});
add(&Flags::firewall_rules,
"firewall_rules",
"The value could be a JSON-formatted string of rules or a\n"
"file path containing the JSON-formatted rules used in the endpoints\n"
"firewall. Path must be of the form `file:///path/to/file`\n"
"or `/path/to/file`.\n"
"\n"
"See the `Firewall` message in `flags.proto` for the expected format.\n"
"\n"
"Example:\n"
"{\n"
" \"disabled_endpoints\": {\n"
" \"paths\": [\n"
" \"/files/browse\",\n"
" \"/metrics/snapshot\"\n"
" ]\n"
" }\n"
"}");
add(&Flags::credential,
"credential",
"Path to a JSON-formatted file containing the credential\n"
"to use to authenticate with the master.\n"
"Path could be of the form `file:///path/to/file` or `/path/to/file`."
"\n"
"Example:\n"
"{\n"
" \"principal\": \"username\",\n"
" \"secret\": \"secret\"\n"
"}");
add(&Flags::acls,
"acls",
"The value could be a JSON-formatted string of ACLs\n"
"or a file path containing the JSON-formatted ACLs used\n"
"for authorization. Path could be of the form `file:///path/to/file`\n"
"or `/path/to/file`.\n"
"\n"
"Note that if the `--authorizer` flag is provided with a value\n"
"other than `" + string(DEFAULT_AUTHORIZER) + "`, the ACLs contents\n"
"will be ignored.\n"
"\n"
"See the ACLs protobuf in acls.proto for the expected format.\n"
"\n"
"Example:\n"
"{\n"
" \"get_endpoints\": [\n"
" {\n"
" \"principals\": { \"values\": [\"a\"] },\n"
" \"paths\": { \"values\": [\"/flags\", \"/monitor/statistics\"] }\n"
" }\n"
" ]\n"
"}");
add(&Flags::containerizers,
"containerizers",
"Comma-separated list of containerizer implementations\n"
"to compose in order to provide containerization.\n"
"Available options are `mesos` and `docker` (on Linux).\n"
"The order the containerizers are specified is the order\n"
"they are tried.\n",
"mesos");
// Docker containerizer flags.
add(&Flags::docker,
"docker",
"The absolute path to the docker executable for docker\n"
"containerizer.\n",
"docker");
add(&Flags::docker_remove_delay,
"docker_remove_delay",
"The amount of time to wait before removing docker containers \n"
"(i.e., `docker rm`) after Mesos regards the container as TERMINATED\n"
"(e.g., `3days`, `2weeks`, etc). This only applies for the Docker\n"
"Containerizer.\n",
DOCKER_REMOVE_DELAY);
add(&Flags::docker_kill_orphans,
"docker_kill_orphans",
"Enable docker containerizer to kill orphaned containers.\n"
"You should consider setting this to false when you launch multiple\n"
"agents in the same OS, to avoid one of the DockerContainerizer \n"
"removing docker tasks launched by other agents.\n",
true);
add(&Flags::docker_mesos_image,
"docker_mesos_image",
"The Docker image used to launch this Mesos agent instance.\n"
"If an image is specified, the docker containerizer assumes the agent\n"
"is running in a docker container, and launches executors with\n"
"docker containers in order to recover them when the agent restarts and\n"
"recovers.\n");
add(&Flags::docker_socket,
"docker_socket",
"Resource used by the agent and the executor to provide CLI access\n"
"to the Docker daemon. On Unix, this is typically a path to a\n"
"socket, such as '/var/run/docker.sock'. On Windows this must be a\n"
"named pipe, such as '//./pipe/docker_engine'. NOTE: This must be\n"
"the path used by the Docker image used to run the agent.\n",
DEFAULT_DOCKER_HOST_RESOURCE);
add(&Flags::docker_config,
"docker_config",
"The default docker config file for agent. Can be provided either as an\n"
"absolute path pointing to the agent local docker config file, or as a\n"
"JSON-formatted string. The format of the docker config file should be\n"
"identical to docker's default one (e.g., either\n"
"`$HOME/.docker/config.json` or `$HOME/.dockercfg`).\n"
"Example JSON (`$HOME/.docker/config.json`):\n"
"{\n"
" \"auths\": {\n"
" \"https://index.docker.io/v1/\": {\n"
" \"auth\": \"xXxXxXxXxXx=\",\n"
" \"email\": \"username@example.com\"\n"
" }\n"
" }\n"
"}");
add(&Flags::sandbox_directory,
"sandbox_directory",
"The absolute path for the directory in the container where the\n"
"sandbox is mapped to.\n",
#ifndef __WINDOWS__
"/mnt/mesos/sandbox"
#else
"C:\\mesos\\sandbox"
#endif // __WINDOWS__
);
add(&Flags::default_container_dns,
"default_container_dns",
"JSON-formatted DNS information for CNI networks (Mesos containerizer)\n"
"and CNM networks (Docker containerizer). For CNI networks, this flag\n"
"can be used to configure `nameservers`, `domain`, `search` and\n"
"`options`, and its priority is lower than the DNS information returned\n"
"by a CNI plugin, but higher than the DNS information in agent host's\n"
"/etc/resolv.conf. For CNM networks, this flag can be used to configure\n"
"`nameservers`, `search` and `options`, it will only be used if there\n"
"is no DNS information provided in the ContainerInfo.docker.parameters\n"
"message.\n"
"\n"
"See the ContainerDNS message in `flags.proto` for the expected format.\n"
"\n"
"Example:\n"
"{\n"
" \"mesos\": [\n"
" {\n"
" \"network_mode\": \"CNI\",\n"
" \"network_name\": \"net1\",\n"
" \"dns\": {\n"
" \"nameservers\": [ \"8.8.8.8\", \"8.8.4.4\" ]\n"
" }\n"
" }\n"
" ],\n"
" \"docker\": [\n"
" {\n"
" \"network_mode\": \"BRIDGE\",\n"
" \"dns\": {\n"
" \"nameservers\": [ \"8.8.8.8\", \"8.8.4.4\" ]\n"
" }\n"
" },\n"
" {\n"
" \"network_mode\": \"USER\",\n"
" \"network_name\": \"net2\",\n"
" \"dns\": {\n"
" \"nameservers\": [ \"8.8.8.8\", \"8.8.4.4\" ]\n"
" }\n"
" }\n"
" ]\n"
"}",
[](const Option<ContainerDNSInfo>& defaultContainerDNS) -> Option<Error> {
if (defaultContainerDNS.isSome()) {
Option<ContainerDNSInfo::MesosInfo> defaultCniDNS;
hashmap<string, ContainerDNSInfo::MesosInfo> cniNetworkDNS;
Option<ContainerDNSInfo::DockerInfo> dockerBridgeDNS;