Apache Tez#
Última modificación: Mayo 18, 2022
[1]:
!hdfs dfs -mkdir input
!hdfs dfs -copyFromLocal /opt/hadoop/etc/hadoop/*.xml input
!hdfs dfs -ls input/
Found 9 items
-rw-r--r-- 1 root supergroup 8814 2022-05-26 03:12 input/capacity-scheduler.xml
-rw-r--r-- 1 root supergroup 240 2022-05-26 03:12 input/core-site.xml
-rw-r--r-- 1 root supergroup 10206 2022-05-26 03:12 input/hadoop-policy.xml
-rw-r--r-- 1 root supergroup 344 2022-05-26 03:12 input/hdfs-site.xml
-rw-r--r-- 1 root supergroup 620 2022-05-26 03:12 input/httpfs-site.xml
-rw-r--r-- 1 root supergroup 3518 2022-05-26 03:12 input/kms-acls.xml
-rw-r--r-- 1 root supergroup 5939 2022-05-26 03:12 input/kms-site.xml
-rw-r--r-- 1 root supergroup 776 2022-05-26 03:12 input/mapred-site.xml
-rw-r--r-- 1 root supergroup 521 2022-05-26 03:12 input/yarn-site.xml
[2]:
!hadoop jar /opt/tez/tez-examples-0.7.1.jar orderedwordcount input output
[3]:
!hdfs dfs -ls output/
Found 2 items
-rw-r--r-- 1 root supergroup 0 2022-05-26 03:12 output/_SUCCESS
-rw-r--r-- 1 root supergroup 12869 2022-05-26 03:12 output/part-v002-o000-r-00000
[4]:
!hdfs dfs -cat output/part-v002-o000-r-00000
place 1
(default) 1
percentage 1
percent 1
ones, 1
ones. 1
(default), 1
only. 1
pending 1
(or 1
opportunities, 1
opportunities. 1
password. 1
over 1
overridden 1
(root 1
overrides 1
ownership. 1
parent 1
particular 1
(specified 1
within 1
window, 1
-1 1
0.0 1
1-MAX_INT. 1
1. 1
1.0. 1
window 1
40+20=60 1
while 1
well 1
via 1
version="1.0"?> 1
version 1
v2 1
<description>Default 1
<name>default.key.acl.DECRYPT_EEK</name> 1
<name>default.key.acl.GENERATE_EEK</name> 1
<name>default.key.acl.MANAGEMENT</name> 1
<name>default.key.acl.READ</name> 1
<name>dfs.replication</name> 1
<name>fs.defaultFS</name> 1
<name>hadoop.kms.acl.CREATE</name> 1
<name>hadoop.kms.acl.DECRYPT_EEK</name> 1
<name>hadoop.kms.acl.DELETE</name> 1
<name>hadoop.kms.acl.GENERATE_EEK</name> 1
<name>hadoop.kms.acl.GET</name> 1
<name>hadoop.kms.acl.GET_KEYS</name> 1
<name>hadoop.kms.acl.GET_METADATA</name> 1
<name>hadoop.kms.acl.ROLLOVER</name> 1
<name>hadoop.kms.acl.SET_KEY_MATERIAL</name> 1
<name>hadoop.kms.audit.aggregation.window.ms</name> 1
<name>hadoop.kms.audit.logger</name> 1
<name>hadoop.kms.authentication.kerberos.keytab</name> 1
<name>hadoop.kms.authentication.kerberos.name.rules</name> 1
<name>hadoop.kms.authentication.kerberos.principal</name> 1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type</name> 1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string</name> 1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab</name> 1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal</name> 1
<name>hadoop.kms.authentication.signer.secret.provider.zookeeper.path</name> 1
<name>hadoop.kms.authentication.signer.secret.provider</name> 1
<name>hadoop.kms.authentication.type</name> 1
<name>hadoop.kms.cache.enable</name> 1
<name>hadoop.kms.cache.timeout.ms</name> 1
<name>hadoop.kms.current.key.cache.timeout.ms</name> 1
<name>hadoop.kms.key.provider.uri</name> 1
<name>hadoop.security.keystore.java-keystore-provider.password-file</name> 1
<name>mapreduce.application.classpath</name> 1
<name>mapreduce.framework.name</name> 1
<name>mapreduce.map.log.level</name> 1
<name>mapreduce.reduce.log.level</name> 1
<name>security.admin.operations.protocol.acl</name> 1
<name>security.applicationclient.protocol.acl</name> 1
<name>security.applicationhistory.protocol.acl</name> 1
<name>security.applicationmaster.protocol.acl</name> 1
<name>security.client.datanode.protocol.acl</name> 1
<name>security.client.protocol.acl</name> 1
<name>security.collector-nodemanager.protocol.acl</name> 1
<name>security.containermanagement.protocol.acl</name> 1
<name>security.datanode.protocol.acl</name> 1
<name>security.ha.service.protocol.acl</name> 1
<name>security.inter.datanode.protocol.acl</name> 1
<name>security.job.client.protocol.acl</name> 1
<name>security.job.task.protocol.acl</name> 1
<name>security.mrhs.client.protocol.acl</name> 1
<name>security.namenode.protocol.acl</name> 1
<name>security.qjournal.service.protocol.acl</name> 1
<name>security.refresh.policy.protocol.acl</name> 1
<name>security.refresh.user.mappings.protocol.acl</name> 1
<name>security.resourcelocalizer.protocol.acl</name> 1
<name>security.resourcemanager-administration.protocol.acl</name> 1
<name>security.resourcetracker.protocol.acl</name> 1
<name>security.zkfc.protocol.acl</name> 1
<name>yarn.app.mapreduce.am.log.level</name> 1
<name>yarn.nodemanager.aux-services</name> 1
<name>yarn.nodemanager.resource.detect-hardware-capabilities</name> 1
<name>yarn.nodemanager.resource.pcores-vcores-multiplier</name> 1
<name>yarn.nodemanager.vmem-check-enabled</name> 1
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name> 1
<name>yarn.scheduler.capacity.maximum-applications</name> 1
<name>yarn.scheduler.capacity.node-locality-delay</name> 1
<name>yarn.scheduler.capacity.per-node-heartbeat.maximum-offswitch-assignments</name> 1
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name> 1
<name>yarn.scheduler.capacity.queue-mappings</name> 1
<name>yarn.scheduler.capacity.rack-locality-additional-delay</name> 1
<name>yarn.scheduler.capacity.resource-calculator</name> 1
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name> 1
<name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name> 1
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name> 1
<name>yarn.scheduler.capacity.root.default.capacity</name> 1
<name>yarn.scheduler.capacity.root.default.default-application-lifetime 1
<name>yarn.scheduler.capacity.root.default.maximum-application-lifetime 1
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name> 1
<name>yarn.scheduler.capacity.root.default.state</name> 1
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name> 1
<name>yarn.scheduler.capacity.root.queues</name> 1
<name>yarn.scheduler.capacity.workflow-priority-mappings-override.enable</name> 1
<name>yarn.scheduler.capacity.workflow-priority-mappings</name> 1
<value>#HOSTNAME#:#PORT#,...</value> 1
<value>${user.home}/kms.keytab</value> 1
workflowId 1
<value>/etc/hadoop/conf/kms.keytab</value> 1
<value>/hadoop-kms/hadoop-auth-signature-secret</value> 1
<value>/opt/hadoop/share/hadoop/mapreduce/*:/opt/hadoop/share/hadoop/mapreduce/lib/*</value> 1
<value>0.1</value> 1
work 1
<value>30000</value> 1
<value>40</value> 1
<value>5</value> 1
<value>600000</value> 1
<value>DEFAULT</value> 1
without 1
<value>HTTP/localhost</value> 1
<value>RUNNING</value> 1
<value>default</value> 1
"clumping" 1
<value>hdfs://0.0.0.0:9000</value> 1
<value>jceks://file@/${user.home}/kms.keystore</value> 1
<value>kms.keystore.password</value> 1
<value>kms/#HOSTNAME#</value> 1
<value>mapreduce_shuffle</value> 1
<value>none</value> 1
<value>org.apache.hadoop.crypto.key.kms.server.SimpleKMSAuditLogger</value> 1
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value> 1
<value>random</value> 1
<value>simple</value> 1
<value>yarn-tez</value> 1
updating 1
unique 1
ACLs 1
u:%user:%user 1
ASF 1
AdminOperationsProtocol. 1
type, 1
ApplicationClientProtocol, 1
ApplicationHistoryProtocol, 1
ApplicationMaster 1
ApplicationMasterProtocol, 1
Audit 1
type 1
Backend 1
But 1
traffic. 1
CPU 1
CREATE 1
Cache 1
Cached 1
Can 1
ClientDatanodeProtocol, 1
ClientProtocol, 1
CollectorNodemanagerProtocol, 1
Complementary 1
Configuration 1
Configuring 1
ContainerManagementProtocol 1
Controller 1
Controls 1
DECRYPT_EEK 1
DatanodeProtocol, 1
too 1
DefaultResourceCalculator 1
DistributedFileSystem. 1
DominantResourceCalculator 1
Duplicate 1
Example: 1
Failover 1
to) 1
Foundation 1
GENERATE_EEK 1
HAAdmin 1
HAService 1
HSClientProtocol, 1
History 1
timestamp. 1
In 1
Increasing 1
Indicates 1
InterDatanodeProtocol, 1
"full_queue_name" 1
JNs 1
JavaKeyStoreProvider, 1
text-format 1
tasktracker. 1
"priority". 1
KMSAuditLogger 1
tasks 1
LICENSE 1
Legal 1
target 1
taken. 1
Lower 1
MANAGEMENT 1
MRClientProtocol, 1
"workflowId" 1
Memory 1
Memory, 1
NN 1
NOTICE 1
NamenodeProtocol, 1
"kerberos". 1
such 1
Options 1
Path 1
Protocols 1
Put 1
QJournalProtocol, 1
QuorumJournalManager 1
READ 1
ROLLOVER 1
RUNNING 1
RefreshAuthorizationPolicyProtocol, 1
RefreshUserMappingsProtocol. 1
ResourceCalculator 1
"simple" 1
ResourceManagerAdministrationProtocol, 1
ResourceTrackerProtocol, 1
Resources 1
Rules 1
SPNEGO 1
STOPPED. 1
Security 1
string, 1
Server 1
SimpleKMSAuditLogger 1
Software 1
State 1
TaskUmbilicalProtocol, 1
stored. 1
store 1
Typically 1
URI 1
User 1
states 1
state 1
start 1
'HTTP/' 1
Whether 1
YARN 1
stand-by 1
ZK 1
ZNode 1
'none' 1
[user={name} 1
[u|g]:[name]:[queue_name][,next 1
[workflowId]:[full_queue_name]:[priority][,next 1
specify 1
absolute 1
access 1
accompanying 1
specification. 1
act 1
active 1
administer 1
aggregation 1
sooner. 1
agreements. 1
sometimes 1
allow 1
allowed 1
site-specific 1
along 1
also 1
single 1
any 1
sign 1
setup 1
applications. 1
approximately 1
server 1
separated. 1
assign 1
assigning 1
'random' 1
security 1
attempt 1
'sasl' 1
'string' 1
based 1
basis 1
block 1
secondary 1
cached 1
calculated 1
scheduler. 1
can't 1
capacity 1
capacity.</description> 1
case, 1
changes 1
class 1
client-to-datanode 1
cluster. 1
code 1
collector 1
comma 1
scheduler 1
commands 1
same 1
concurrent 1
config 1
running. 1
configured, 1
connection 1
consulting 1
containers, 1
context) 1
context. 1
contributor 1
controls 1
cookies 1
running 1
copyright 1
count 1
create-key 1
creating 1
current 1
data 1
datanodes 1
decryptEncryptedKey 1
run 1
default_priority={priority}] 1
root 1
delete-key 1
deleted 1
dfsadmin 1
disables 1
rollover-key 1
dominant-resource 1
during 1
e.g, 1
rolling 1
edit 1
return 1
retrieve 1
end 1
endpoint. 1
'zookeeper'. 1
events. 1
example, 1
exceed 1
result 1
feature. 1
resource 1
flushed 1
resolve 1
from 1
from. 1
generateEncryptedKey 1
generation 1
generic 1
get-current-key 1
get-key-metadata 1
get-key-version 1
get-keys 1
get-keys-metadata 1
getKeyVersion 1
getKeyVersion, 1
getMetadata, 1
getMetadata. 1
give 1
given 1
request, 1
regarding 1
group={name} 1
hard 1
has 1
heartbeat. 1
history 1
hostnames 1
hot-reloaded 1
how 1
refreshable. 1
implementation 1
recovery. 1
improve 1
rate 1
in-effect. 1
information 1
instances 1
instances, 1
instead 1
inter-datanode 1
rack. 1
rack-locality-delay=20, 1
queues, 1
queue). 1
query 1
key. 1
keys 1
keystore 1
killed 1
killing 1
quashed 1
provide 1
level 1
license 1
licenses 1
(ASF) 1
like 1
property 1
locations 1
logger 1
logs.</description> 1
low 1
manage 1
mapped 1
mappings. 1
maps 1
masters 1
(Kerberos). 1
max_priority={priority} 1
(as 1
printed 1
principal. 1
message 1
metadata 1
modified. 1
more 1
mradmin 1
ms) 1
multi-dimensional 1
multiple 1
must 1
namenode 1
namenode.</description> 1
positive 1
node's 1
node-locality-delay 1
node-locality-delay=40 1
nodes 1
nodes. 1
port 1
policy 1
value, 2
zero 2
uses 2
<value>10000</value> 2
<value>100</value> 2
'zookeeper' 2
different 2
disabled. 2
mapping]* 2
KeyProvider 2
<value></value> 2
KeyProvider. 2
account. 2
limit 2
mappings 2
priority. 2
taken 2
syntax 2
additional 2
admin 2
MR 2
commands. 2
administrators 2
affects 2
source 2
aggregated 2
communciate 2
<value>true</value> 2
schedule 2
compare 2
Note 2
ACL, 2
Number 2
refresh 2
OFF_SWITCH 2
submit 2
Any 2
i.e. 2
returned 2
configuration. 2
if 2
nodemanager 2
ApplicationMasters 2
connect 2
Authentication 2
signature 2
enabled, 2
considered 2
getCurrentKey 2
setting 2
containers 2
ResourceLocalizer 2
keytab 2
containers. 2
CapacityScheduler 2
log 2
equal 2
namenode. 2
events 2
cookie 2
point-in-time 2
attempts 2
audit 2
name 2
CryptoExtension 2
leaf 2
exceeds 2
less 2
per 2
secret 2
seconds. 2
response. 2
credentials 2
path 2
status 2
Expiry 2
Used 2
cache 2
cache, 2
resources 2
parameter, 2
GET 2
part 2
false. 2
We 2
feature 2
HTTP 2
milliseconds. 2
map 2
then 2
file. 2
user? 2
into 2
40 2
present, 2
: 2
used. 2
where 2
protocol, 2
</name> 2
Zookeeper. 2
client 2
mapping 2
clients 3
<value>-1</value> 3
<value>1</value> 3
<value>ERROR</value> 3
<value>false</value> 3
It 3
KMS. 3
Maximum 3
NodeManager 3
ResourceManager 3
When 3
Zookeeper 3
assignments 3
authentication 3
backing 3
cluster 3
etc. 3
lifetime. 3
material 3
maximum 3
off-switch 3
only 3
opportunities 3
override 3
priority 3
rack-local 3
reduce 3
scheduling 3
service 3
should 3
size 3
submission 3
time 3
timeline 3
user. 3
using 3
values 3
who 3
queues 4
job 4
explicitly 4
it 4
when 4
protocol 4
principal 4
Default 4
than 4
acls 4
configured 4
jobs 4
defined. 4
WITHOUT 5
limitations 5
law 5
language 5
implied. 5
http://www.apache.org/licenses/LICENSE-2.0 5
href="configuration.xsl"?> 5
governing 5
required 5
express 5
except 5
copy 5
compliance 5
2.0 5
software 5
agreed 5
after 5
specific 5
You 5
WARRANTIES 5
Version 5
Unless 5
(the 5
OR 5
submitted 5
OF 5
Licensed 5
License, 5
KIND, 5
"License"); 5
IS" 5
CONDITIONS 5
BASIS, 5
type="text/xsl" 5
ANY 5
"AS 5
writing, 5
<?xml-stylesheet 5
one 5
permissions 5
obtain 5
missed 5
operations 6
either 6
applications 6
Apache 6
Kerberos 6
specified 6
lifetime 6
you 6
See 7
version="1.0" 7
at 7
other. 7
queue. 7
each 7
encoding="UTF-8"?> 7
number 7
applicable 7
queue 8
<?xml 8
an 8
on 8
use 8
that 9
file 9
<configuration> 9
This 9
which 9
</configuration> 9
operations. 9
If 9
can 10
key 10
may 10
License. 10
KMS 10
application 11
distributed 11
communicate 11
as 12
not 15
License 15
default 15
under 16
<!-- 16
--> 16
or 18
users,wheel". 19
"alice,bob 19
separated 19
allowed.</description> 19
"*" 19
blank. 19
will 19
special 19
means 19
e.g. 19
For 20
comma-separated 20
names. 21
this 21
be 22
<description>ACL 22
A 22
users 22
used 25
all 25
in 27
with 28
value 28
are 29
ACL 35
<value>*</value> 38
group 38
user 44
list 45
by 46
<description> 49
</description> 50
to 57
a 57
The 60
and 67
for 71
is 75
of 76
</property> 83
<property> 83
the 136