Apache Tez#

  • Última modificación: Mayo 18, 2022

[1]:
!hdfs dfs -mkdir input
!hdfs dfs -copyFromLocal /opt/hadoop/etc/hadoop/*.xml input
!hdfs dfs -ls input/
Found 9 items
-rw-r--r--   1 root supergroup       8814 2022-05-26 03:12 input/capacity-scheduler.xml
-rw-r--r--   1 root supergroup        240 2022-05-26 03:12 input/core-site.xml
-rw-r--r--   1 root supergroup      10206 2022-05-26 03:12 input/hadoop-policy.xml
-rw-r--r--   1 root supergroup        344 2022-05-26 03:12 input/hdfs-site.xml
-rw-r--r--   1 root supergroup        620 2022-05-26 03:12 input/httpfs-site.xml
-rw-r--r--   1 root supergroup       3518 2022-05-26 03:12 input/kms-acls.xml
-rw-r--r--   1 root supergroup       5939 2022-05-26 03:12 input/kms-site.xml
-rw-r--r--   1 root supergroup        776 2022-05-26 03:12 input/mapred-site.xml
-rw-r--r--   1 root supergroup        521 2022-05-26 03:12 input/yarn-site.xml
[2]:
!hadoop jar /opt/tez/tez-examples-0.7.1.jar orderedwordcount input output
[3]:
!hdfs dfs -ls output/
Found 2 items
-rw-r--r--   1 root supergroup          0 2022-05-26 03:12 output/_SUCCESS
-rw-r--r--   1 root supergroup      12869 2022-05-26 03:12 output/part-v002-o000-r-00000
[4]:
!hdfs dfs -cat output/part-v002-o000-r-00000
place   1
(default)       1
percentage      1
percent 1
ones,   1
ones.   1
(default),      1
only.   1
pending 1
(or     1
opportunities,  1
opportunities.  1
password.       1
over    1
overridden      1
(root   1
overrides       1
ownership.      1
parent  1
particular      1
(specified      1
within  1
window, 1
-1      1
0.0     1
1-MAX_INT.      1
1.      1
1.0.    1
window  1
40+20=60        1
while   1
well    1
via     1
version="1.0"?>      1
version 1
v2      1
<description>Default      1
<name>default.key.acl.DECRYPT_EEK</name>    1
<name>default.key.acl.GENERATE_EEK</name>   1
<name>default.key.acl.MANAGEMENT</name>     1
<name>default.key.acl.READ</name>   1
<name>dfs.replication</name>        1
<name>fs.defaultFS</name>   1
<name>hadoop.kms.acl.CREATE</name>  1
<name>hadoop.kms.acl.DECRYPT_EEK</name>     1
<name>hadoop.kms.acl.DELETE</name>  1
<name>hadoop.kms.acl.GENERATE_EEK</name>    1
<name>hadoop.kms.acl.GET</name>     1
<name>hadoop.kms.acl.GET_KEYS</name>        1
<name>hadoop.kms.acl.GET_METADATA</name>    1
<name>hadoop.kms.acl.ROLLOVER</name>        1
<name>hadoop.kms.acl.SET_KEY_MATERIAL</name>        1
<name>hadoop.kms.audit.aggregation.window.ms</name> 1
<name>hadoop.kms.audit.logger</name>        1
<name>hadoop.kms.authentication.kerberos.keytab</name>      1
<name>hadoop.kms.authentication.kerberos.name.rules</name>  1
<name>hadoop.kms.authentication.kerberos.principal</name>   1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type</name>   1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string</name>   1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab</name>     1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal</name>  1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.path</name>        1
<name>hadoop.kms.authentication.signer.secret.provider</name>       1
<name>hadoop.kms.authentication.type</name> 1
<name>hadoop.kms.cache.enable</name>        1
<name>hadoop.kms.cache.timeout.ms</name>    1
<name>hadoop.kms.current.key.cache.timeout.ms</name>        1
<name>hadoop.kms.key.provider.uri</name>    1
<name>hadoop.security.keystore.java-keystore-provider.password-file</name>  1
<name>mapreduce.application.classpath</name>        1
<name>mapreduce.framework.name</name>       1
<name>mapreduce.map.log.level</name>        1
<name>mapreduce.reduce.log.level</name>     1
<name>security.admin.operations.protocol.acl</name> 1
<name>security.applicationclient.protocol.acl</name>        1
<name>security.applicationhistory.protocol.acl</name>       1
<name>security.applicationmaster.protocol.acl</name>        1
<name>security.client.datanode.protocol.acl</name>  1
<name>security.client.protocol.acl</name>   1
<name>security.collector-nodemanager.protocol.acl</name>    1
<name>security.containermanagement.protocol.acl</name>      1
<name>security.datanode.protocol.acl</name> 1
<name>security.ha.service.protocol.acl</name>       1
<name>security.inter.datanode.protocol.acl</name>   1
<name>security.job.client.protocol.acl</name>       1
<name>security.job.task.protocol.acl</name> 1
<name>security.mrhs.client.protocol.acl</name>      1
<name>security.namenode.protocol.acl</name> 1
<name>security.qjournal.service.protocol.acl</name> 1
<name>security.refresh.policy.protocol.acl</name>   1
<name>security.refresh.user.mappings.protocol.acl</name>    1
<name>security.resourcelocalizer.protocol.acl</name>        1
<name>security.resourcemanager-administration.protocol.acl</name>   1
<name>security.resourcetracker.protocol.acl</name>  1
<name>security.zkfc.protocol.acl</name>     1
<name>yarn.app.mapreduce.am.log.level</name>        1
<name>yarn.nodemanager.aux-services</name>  1
<name>yarn.nodemanager.resource.detect-hardware-capabilities</name> 1
<name>yarn.nodemanager.resource.pcores-vcores-multiplier</name>     1
<name>yarn.nodemanager.vmem-check-enabled</name>    1
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>    1
<name>yarn.scheduler.capacity.maximum-applications</name>   1
<name>yarn.scheduler.capacity.node-locality-delay</name>    1
<name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name>       1
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name> 1
<name>yarn.scheduler.capacity.queue-mappings</name> 1
<name>yarn.scheduler.capacity.rack-locality-additional-delay</name> 1
<name>yarn.scheduler.capacity.resource-calculator</name>    1
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>      1
<name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>      1
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>   1
<name>yarn.scheduler.capacity.root.default.capacity</name>  1
<name>yarn.scheduler.capacity.root.default.default-application-lifetime   1
<name>yarn.scheduler.capacity.root.default.maximum-application-lifetime   1
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>  1
<name>yarn.scheduler.capacity.root.default.state</name>     1
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name> 1
<name>yarn.scheduler.capacity.root.queues</name>    1
<name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name>     1
<name>yarn.scheduler.capacity.workflow-priority-mappings</name>     1
<value>#HOSTNAME#:#PORT#,...</value>        1
<value>${user.home}/kms.keytab</value>      1
workflowId      1
<value>/etc/hadoop/conf/kms.keytab</value>  1
<value>/hadoop-kms/hadoop-auth-signature-secret</value>     1
<value>/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/mapreduce/lib/*</value>        1
<value>0.1</value>  1
work    1
<value>30000</value>        1
<value>40</value>   1
<value>5</value>    1
<value>600000</value>       1
<value>DEFAULT</value>      1
without 1
<value>HTTP/localhost</value>       1
<value>RUNNING</value>      1
<value>default</value>      1
"clumping"      1
<value>hdfs://0.0.0.0:9000</value>  1
<value>jceks://file@/${user.home}/kms.keystore</value>      1
<value>kms.keystore.password</value>        1
<value>kms/#HOSTNAME#</value>       1
<value>mapreduce_shuffle</value>    1
<value>none</value> 1
<value>org.apache.hadoop.crypto.key.kms.server.SimpleKMSAuditLogger</value> 1
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>       1
<value>random</value>       1
<value>simple</value>       1
<value>yarn-tez</value>     1
updating        1
unique  1
ACLs    1
u:%user:%user   1
ASF     1
AdminOperationsProtocol.        1
type,   1
ApplicationClientProtocol,      1
ApplicationHistoryProtocol,     1
ApplicationMaster       1
ApplicationMasterProtocol,      1
Audit   1
type    1
Backend 1
But     1
traffic.        1
CPU     1
CREATE  1
Cache   1
Cached  1
Can     1
ClientDatanodeProtocol, 1
ClientProtocol, 1
CollectorNodemanagerProtocol,   1
Complementary   1
Configuration   1
Configuring     1
ContainerManagementProtocol     1
Controller      1
Controls        1
DECRYPT_EEK     1
DatanodeProtocol,       1
too     1
DefaultResourceCalculator       1
DistributedFileSystem.  1
DominantResourceCalculator      1
Duplicate       1
Example:        1
Failover        1
to)     1
Foundation      1
GENERATE_EEK    1
HAAdmin 1
HAService       1
HSClientProtocol,       1
History 1
timestamp.      1
In      1
Increasing      1
Indicates       1
InterDatanodeProtocol,  1
"full_queue_name"       1
JNs     1
JavaKeyStoreProvider,   1
text-format     1
tasktracker.    1
"priority".     1
KMSAuditLogger  1
tasks   1
LICENSE 1
Legal   1
target  1
taken.  1
Lower   1
MANAGEMENT      1
MRClientProtocol,       1
"workflowId"    1
Memory  1
Memory, 1
NN      1
NOTICE  1
NamenodeProtocol,       1
&quot;kerberos&quot;.   1
such    1
Options 1
Path    1
Protocols       1
Put     1
QJournalProtocol,       1
QuorumJournalManager    1
READ    1
ROLLOVER        1
RUNNING 1
RefreshAuthorizationPolicyProtocol,     1
RefreshUserMappingsProtocol.    1
ResourceCalculator      1
&quot;simple&quot;      1
ResourceManagerAdministrationProtocol,  1
ResourceTrackerProtocol,        1
Resources       1
Rules   1
SPNEGO  1
STOPPED.        1
Security        1
string, 1
Server  1
SimpleKMSAuditLogger    1
Software        1
State   1
TaskUmbilicalProtocol,  1
stored. 1
store   1
Typically       1
URI     1
User    1
states  1
state   1
start   1
'HTTP/' 1
Whether 1
YARN    1
stand-by        1
ZK      1
ZNode   1
'none'  1
[user={name}    1
[u|g]:[name]:[queue_name][,next 1
[workflowId]:[full_queue_name]:[priority][,next 1
specify 1
absolute        1
access  1
accompanying    1
specification.  1
act     1
active  1
administer      1
aggregation     1
sooner. 1
agreements.     1
sometimes       1
allow   1
allowed 1
site-specific   1
along   1
also    1
single  1
any     1
sign    1
setup   1
applications.   1
approximately   1
server  1
separated.      1
assign  1
assigning       1
'random'        1
security        1
attempt 1
'sasl'  1
'string'        1
based   1
basis   1
block   1
secondary       1
cached  1
calculated      1
scheduler.      1
can't       1
capacity        1
capacity.</description>   1
case,   1
changes 1
class   1
client-to-datanode      1
cluster.        1
code    1
collector       1
comma   1
scheduler       1
commands        1
same    1
concurrent      1
config  1
running.        1
configured,     1
connection      1
consulting      1
containers,     1
context)        1
context.        1
contributor     1
controls        1
cookies 1
running 1
copyright       1
count   1
create-key      1
creating        1
current 1
data    1
datanodes       1
decryptEncryptedKey     1
run     1
default_priority={priority}]    1
root    1
delete-key      1
deleted 1
dfsadmin        1
disables        1
rollover-key    1
dominant-resource       1
during  1
e.g,    1
rolling 1
edit    1
return  1
retrieve        1
end     1
endpoint.       1
'zookeeper'.    1
events. 1
example,        1
exceed  1
result  1
feature.        1
resource        1
flushed 1
resolve 1
from    1
from.   1
generateEncryptedKey    1
generation      1
generic 1
get-current-key 1
get-key-metadata        1
get-key-version 1
get-keys        1
get-keys-metadata       1
getKeyVersion   1
getKeyVersion,  1
getMetadata,    1
getMetadata.    1
give    1
given   1
request,        1
regarding       1
group={name}    1
hard    1
has     1
heartbeat.      1
history 1
hostnames       1
hot-reloaded    1
how     1
refreshable.    1
implementation  1
recovery.       1
improve 1
rate    1
in-effect.      1
information     1
instances       1
instances,      1
instead 1
inter-datanode  1
rack.   1
rack-locality-delay=20, 1
queues, 1
queue). 1
query   1
key.    1
keys    1
keystore        1
killed  1
killing 1
quashed 1
provide 1
level   1
license 1
licenses        1
(ASF)   1
like    1
property        1
locations       1
logger  1
logs.</description>       1
low     1
manage  1
mapped  1
mappings.       1
maps    1
masters 1
(Kerberos).     1
max_priority={priority} 1
(as     1
printed 1
principal.      1
message 1
metadata        1
modified.       1
more    1
mradmin 1
ms)     1
multi-dimensional       1
multiple        1
must    1
namenode        1
namenode.</description>   1
positive        1
node's      1
node-locality-delay     1
node-locality-delay=40  1
nodes   1
nodes.  1
port    1
policy  1
value,  2
zero    2
uses    2
<value>10000</value>        2
<value>100</value>  2
'zookeeper'     2
different       2
disabled.       2
mapping]*       2
KeyProvider     2
<value></value>     2
KeyProvider.    2
account.        2
limit   2
mappings        2
priority.       2
taken   2
syntax  2
additional      2
admin   2
MR      2
commands.       2
administrators  2
affects 2
source  2
aggregated      2
communciate     2
<value>true</value> 2
schedule        2
compare 2
Note    2
ACL,    2
Number  2
refresh 2
OFF_SWITCH      2
submit  2
Any     2
i.e.    2
returned        2
configuration.  2
if      2
nodemanager     2
ApplicationMasters      2
connect 2
Authentication  2
signature       2
enabled,        2
considered      2
getCurrentKey   2
setting 2
containers      2
ResourceLocalizer       2
keytab  2
containers.     2
CapacityScheduler       2
log     2
equal   2
namenode.       2
events  2
cookie  2
point-in-time   2
attempts        2
audit   2
name    2
CryptoExtension 2
leaf    2
exceeds 2
less    2
per     2
secret  2
seconds.        2
response.       2
credentials     2
path    2
status  2
Expiry  2
Used    2
cache   2
cache,  2
resources       2
parameter,      2
GET     2
part    2
false.  2
We      2
feature 2
HTTP    2
milliseconds.   2
map     2
then    2
file.   2
user?   2
into    2
40      2
present,        2
:       2
used.   2
where   2
protocol,       2
</name>   2
Zookeeper.      2
client  2
mapping 2
clients 3
<value>-1</value>   3
<value>1</value>    3
<value>ERROR</value>        3
<value>false</value>        3
It      3
KMS.    3
Maximum 3
NodeManager     3
ResourceManager 3
When    3
Zookeeper       3
assignments     3
authentication  3
backing 3
cluster 3
etc.    3
lifetime.       3
material        3
maximum 3
off-switch      3
only    3
opportunities   3
override        3
priority        3
rack-local      3
reduce  3
scheduling      3
service 3
should  3
size    3
submission      3
time    3
timeline        3
user.   3
using   3
values  3
who     3
queues  4
job     4
explicitly      4
it      4
when    4
protocol        4
principal       4
Default 4
than    4
acls    4
configured      4
jobs    4
defined.        4
WITHOUT 5
limitations     5
law     5
language        5
implied.        5
http://www.apache.org/licenses/LICENSE-2.0      5
href="configuration.xsl"?>   5
governing       5
required        5
express 5
except  5
copy    5
compliance      5
2.0     5
software        5
agreed  5
after   5
specific        5
You     5
WARRANTIES      5
Version 5
Unless  5
(the    5
OR      5
submitted       5
OF      5
Licensed        5
License,        5
KIND,   5
"License");     5
IS" 5
CONDITIONS      5
BASIS,  5
type="text/xsl" 5
ANY     5
"AS 5
writing,        5
<?xml-stylesheet     5
one     5
permissions     5
obtain  5
missed  5
operations      6
either  6
applications    6
Apache  6
Kerberos        6
specified       6
lifetime        6
you     6
See     7
version="1.0"   7
at      7
other.  7
queue.  7
each    7
encoding="UTF-8"?>   7
number  7
applicable      7
queue   8
<?xml        8
an      8
on      8
use     8
that    9
file    9
<configuration>   9
This    9
which   9
</configuration>  9
operations.     9
If      9
can     10
key     10
may     10
License.        10
KMS     10
application     11
distributed     11
communicate     11
as      12
not     15
License 15
default 15
under   16
<!-- 16
-->  16
or      18
users,wheel".       19
"alice,bob  19
separated       19
allowed.</description>    19
"*"     19
blank.  19
will    19
special 19
means   19
e.g.    19
For     20
comma-separated 20
names.  21
this    21
be      22
<description>ACL  22
A       22
users   22
used    25
all     25
in      27
with    28
value   28
are     29
ACL     35
<value>*</value>    38
group   38
user    44
list    45
by      46
<description>     49
</description>    50
to      57
a       57
The     60
and     67
for     71
is      75
of      76
</property>       83
<property>        83
the     136