Avro入门

时间:2011-03-29 23:48:26

标签: mapreduce avro

我想开始使用Avro和Map Reduce。有人可以建议一个很好的教程/示例来开始。我无法通过互联网搜索找到很多。

4 个答案:

答案 0 :(得分:17)

我最近做了一个严重基于Avro数据的项目,之前没有使用过这种数据格式,我不得不从头开始。你是对的,因为在开始使用Avro时很难获得在线资源的帮助。我建议你的材料是:

  • 到目前为止,我找到的最有用的来源是Tom White的Hadoop: The Definitive Guide书中的Avro部分(p103-p116)以及他在本书中使用的代码Github page
  • 有关其他代码示例,我查看了Ron Bodkin的Github页面avro-mr-sample
  • 就我而言,我使用Python来阅读和编写Avro文件,为此我使用了这个tutorial
  • 即使很明显,我也会将链接添加到Avro Users mailing list。在那里可以找到大量的信息,在我阅读了上述材料并实施了大量代码之后,我发现自己花了几个小时查看档案。

最后,我的最后一个建议是使用 Avro 1.4.1与Hadoop 0.20.2 ,并且仅使用该组合。我在使用Hadoop 0.21和更新的Avro版本运行代码时遇到了一些重大问题。

答案 1 :(得分:2)

其他链接:

我在文档中看到的主要问题(确实存在的问题很少)是它专注于非常费力的“通用”方法;这看起来很奇怪,因为它结合了两者的最坏情况 - 您仍然必须提供完整的数据模式,但不会从静态类型等中获益。 自动代码生成更方便,但覆盖率不高。

答案 2 :(得分:2)

https://github.com/apache/avro/blob/trunk/lang/java/mapred avro源代码确实有例子。例如TestReflectJob帮助我使用预定义的域对象编写map-reduce作业

答案 3 :(得分:0)

javascript + jquery:https://codepen.io/GentjanLikaj/pen/dyPXLXX

$( "#btn1" ).click(function() {
	$( "table" ).remove();
	var text1 = document.getElementById('text').value;
	var text1=text1.replace(/(\r\n|\n|\r)/gm,"");
	var sep = document.getElementById('sep').value;
	var aray = text1.split(sep);
	var formato = '<select class="form"><option value="string">string</option><option value="int">int</option><option value="date">date</option><option value="datetime">datetime</option><option value="float">float</option><option value="varchar(50)">varchar</option></select>';
	var univoco = '<input id="checkbox" type="checkbox" name="univoco" value="true" > <br>';
	var null1 = '<input id="null" class="null11" type="checkbox" name="univoco" value="null" checked> <br>';
	var html = '<table id="tbl" class="table table-sm  table-striped table-centered .thead-dark "><thead class="thead-dark"><tr class="table-primary"><th scope="col">ColummnName</th><th scope="col">Format</th>  <th scope="col">Null</th>   </tr>  </thead> <tbody>';
	$.each(aray, function(i, val) {
		html += '<tr><td id = "val">' + val + '</td><td id = "form">' + formato + '</td><td id = "nul">' + null1 + '</td></tr>';
	});
	html += '</tbody></table>';	
	$("#table").append(html);
});

$( "#btn2").click(function() {
	$( "#avro" ).empty();
	var myRows = {myRows:[]};
	var $th = $('#tbl th');
	$('#tbl tbody tr').each(function(i, tr){
		var obj = {}, 
		$tds = $(tr).find('td');
		$td1 = $(tr).find('select.form').children("option:selected");
		$tds2 = $(tr).find('#checkbox');
		$tds3 = $(tr).find('#null');
		$th.each(function(){
			obj['name'] = $tds.eq(0).text();
			var type=$td1.eq(0).val();
			var nullv=$tds3.eq(0).is(':checked');
			if (type=='date') {
				type={type:"int", logicalType:"date"};
			}else if (nullv == true) {
				type = [type ,'null' ];
			}
			else{
				type;
			}
			obj['type'] = type;
			// obj['univoco'] = $tds2.eq(0).is(':checked');
			// obj['null'] = $tds3.eq(0).is(':checked');
		});
		myRows.myRows.push(obj);		
	});	
	console.log(JSON.stringify(myRows));
	var header = '{ <br> &nbsp &nbsp "type": "record"  ,  <br>   &nbsp &nbsp "namespace": "Mezzora" ,  <br>  &nbsp &nbsp "name": "ReportDSL" ,  <br>   &nbsp &nbsp  "fields": [ <br>';
	$('#avro').append(header);
	text ='&nbsp &nbsp &nbsp &nbsp &nbsp '+JSON.stringify(myRows.myRows[0])  +"<br>";
	$('#avro').append(text);
	var i;
	for (i = 1; i < myRows.myRows.length; i++) {
		text ='&nbsp &nbsp &nbsp &nbsp &nbsp  ,'+ JSON.stringify(myRows.myRows[i])  +"<br>";
		$('#avro').append(text);
	}
	var footer = '&nbsp &nbsp &nbsp &nbsp  ] <br> }';
	$('#avro').append(footer);	
});

$( "#btn3").click(function() {
	$( "#sql" ).empty();
	var myRows = {myRows:[]};
	var $th = $('#tbl th');
	$('#tbl tbody tr').each(function(i, tr){
		var obj = {}, 
		$tds = $(tr).find('td');
		$td1 = $(tr).find('select.form').children("option:selected");
		$tds2 = $(tr).find('#checkbox');
		$tds3 = $(tr).find('#null');
		$th.each(function(){
			obj['name'] = $tds.eq(0).text();
			var type=$td1.eq(0).val();
			var nullv=$tds3.eq(0).is(':checked');
			if (nullv == false) {
				type= type +' not null';
			}else if (nullv == true) {
				type = type +' null' ;
			}else{
				if (type == 'string') {
					type = 'varchar(50)';
				}else{
					type;
				}
			}			
			obj['type'] = type;
			// obj['univoco'] = $tds2.eq(0).is(':checked');
			// obj['null'] = $tds3.eq(0).is(':checked');
		});
		myRows.myRows.push(obj);		
	});	
	console.log(JSON.stringify(myRows));
	var header = 'CREATE TABLE [schema].[tblName] &nbsp ( <br>';
	$('#sql').append(header);
	var i;
	for (i = 0; i < myRows.myRows.length; i++) {
		text ='&nbsp &nbsp &nbsp &nbsp &nbsp '+ JSON.stringify(myRows.myRows[i].name).replace('\"',' ').replace('\"',' ') +JSON.stringify(myRows.myRows[i].type).replace('\"',' ').replace('\"',' ')   +", <br>";
		$('#sql').append(text);
	}
	var footer = ');';
	$('#sql').append(footer);	
});
.parent {
	display: grid;
	grid-template-columns: repeat(10, 1fr);
	grid-template-rows: repeat(12, 1fr);
	grid-column-gap: 20px;
	grid-row-gap: 0px;
	}
	
	.div1 { grid-area: 1 / 2 / 2 / 11; }
	.div2 { grid-area: 5 / 6 / 6 / 7; }
	.div3 { grid-area: 3 / 1 / 5 / 12; }
	.div4 { grid-area: 2 / 1 / 3 / 2; }
	.div5 { grid-area: 6 / 1 / 13 / 4; }
	.div6 { grid-area: 6 / 4 / 13 / 8; }
	.div7 { grid-area: 6 / 8 / 13 / 12; }
	
	.div6 , .div7{

		border: 1px solid black;
		margin-right: 10px;
	}

	#btn1{
		margin-bottom: 30px;
	}
<!DOCTYPE html>
<html lang="en">

<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<meta http-equiv="X-UA-Compatible" content="ie=edge">
	<title></title>
  <script  src="https://code.jquery.com/jquery-3.4.1.js"  integrity="sha256-WpOohJOqMqqyKL9FccASB9O0KwACQJpFTUBLTYOVvVU="	crossorigin="anonymous"> </script>
  <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.4.1/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-Vkoo8x4CGsO3+Hhxv8T/Q5PaXtkKtu6ug5TOeNV6gBiFeWPGFN9MuhOf23Q9Ifjh" crossorigin="anonymous">
</head>
<body>
	<h2>Insert the text and the separator and press the button: </h2>
	<div class="parent">
		<div class="div1"></div>
		<div class="div2 btn btn-alert"></div>
		<div class="div3">
      <textarea name="text" id="text" cols="120" rows="4" placeholder="ColummnName1,ColummName2,ColummName3,ColummName4,............"></textarea>
      <input type="button"	value="Convert to Table" id="btn1" class="btn btn-primary"> 
    </div>
		<div class="div4">Separatetor<input type="text" name="sep" id="sep" value=","> </div>
		<div class="div5">
			<input type="button" value="Convert to Avro" class="btn btn-success" id="btn2">
			<input type="button" value="Convert to SQL Create" class="btn btn-info" id="btn3">
			<div id="table"></div>
		</div>
		<div class="div6 card">
			<div class="card-header font-weight-bolder">Avro Format</div>
			<div class="card-body" id='avro'>
			</div>
		</div>
		<div class="div7 card">
			<div class="card-header font-weight-bolder">SQL Create</div>
			<div class="card-body" id='sql'>
			</div>
		</div>
	</div>
</body>
</html>