Have you ever tried to load csv data in Teradata tables? I mean: for huge files with more than a million of records we surely have to go with FastLoad, MultiLoad etc. But what about if we have a file with a few number of records and we don’t want to write FastLoad routines? Surely we’ve got the option of Teradata SQLAssistant import feature, but – as many of those who went this road will confirm – that’s not what we’d call a fast way!
That’s why I’ve decided to write a few lines of c sharp’s code to speed up the load of “not so big files”.
You can find the source code here and you’ll need Visual Studio 2017 Community Edition or higher to compile an executable version.
Also you can have a look at the main “.cs” file’s content just below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 |
using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using System.IO; using Teradata.Client.Provider; using System.Diagnostics; namespace loadtable { public partial class Form1 : Form { private char SEPARATOR = ';'; private string host { get; set; } private string uid { get; set; } private string pwd { get; set; } public Form1() { InitializeComponent(); } private void button1_Click(object sender, EventArgs e) { Stopwatch sw = new Stopwatch(); sw.Start(); Teradata.Client.Provider.TdConnection mainConn = new Teradata.Client.Provider.TdConnection(); TdConnectionStringBuilder stringBuilder = new TdConnectionStringBuilder(); stringBuilder.CommandTimeout = 300; stringBuilder.ConnectionTimeout = 100; stringBuilder.DataSource = host; stringBuilder.UserId = uid; stringBuilder.Password = pwd; mainConn.ConnectionString = stringBuilder.ConnectionString; this.Text = host; mainConn.Open(); StreamReader gperead =new StreamReader(@txtFile.Text); string szLine=""; string[] szFields; string[] szHeader; szLine = gperead.ReadLine(); szLine = szLine.Replace("\"", ""); szHeader = szLine.Split(SEPARATOR); Teradata.Client.Provider.TdCommand gpeCmd = new Teradata.Client.Provider.TdCommand("SELECT * FROM " + cboDatabaseList.Text + "." + cboTables.Text, mainConn); gpeCmd.CommandTimeout = 10000; int counter = 0; Teradata.Client.Provider.TdDataAdapter gpeAdapter = new Teradata.Client.Provider.TdDataAdapter(gpeCmd); gpeAdapter.UpdateBatchSize = 50000; gpeAdapter.KeepCommandBatchSequence = false; Teradata.Client.Provider.TdCommandBuilder cb = new Teradata.Client.Provider.TdCommandBuilder(gpeAdapter); DataTable dt = new DataTable(); gpeAdapter.Fill(dt); int iNonBlockingErrors = 0; while ((szLine = gperead.ReadLine()) != null) { szFields = szLine.Replace("\"", "").Split(SEPARATOR); DataRow dr = dt.NewRow(); if(szFields.GetUpperBound(0)==szHeader.GetUpperBound(0)) { for (int i = 0; i < szHeader.GetLength(0); i++) { if (szFields[i] == "?") dr[szHeader[i]] = DBNull.Value; else if (dr.Table.Columns[szHeader[i]].DataType == typeof(DateTime)) try { dr[szHeader[i]] = Convert.ToDateTime(szFields[i].Trim('\"')); } catch (Exception ex) { dr[szHeader[i]] = DBNull.Value; iNonBlockingErrors++; } else if (dr.Table.Columns[szHeader[i]].DataType == typeof(double)) dr[szHeader[i]] = Convert.ToDecimal(szFields[i].Trim('\"').Replace('.', ',')); else try { dr[szHeader[i]] = szFields[i].Trim('\"'); } catch (Exception ex) { dr[szHeader[i]] = DBNull.Value; iNonBlockingErrors++; } } dt.Rows.Add(dr); } if ((counter++ % 50000) == 0) { try { gpeAdapter.Update(dt); } catch (Exception ex) { MessageBox.Show(this, ex.Message); } textBox1.Text = (counter-1).ToString("###,###"); } Application.DoEvents(); } gpeAdapter.Update(dt); textBox1.Text = counter.ToString("###,###"); dt.Dispose(); gpeCmd.Dispose(); mainConn.Close(); mainConn.Dispose(); gperead.Close(); sw.Stop(); MessageBox.Show("Done!\n" + sw.Elapsed + "\nErrors: " + iNonBlockingErrors.ToString("###,###")); } private void button2_Click(object sender, EventArgs e) { OpenFileDialog myDiag = new OpenFileDialog(); DialogResult result = myDiag.ShowDialog(); if (result != DialogResult.Cancel) { txtFile.Text = myDiag.FileName; } myDiag.Dispose(); } private void populateComboes() { TdConnectionStringBuilder stringBuilder = new TdConnectionStringBuilder(); stringBuilder.CommandTimeout = 300; stringBuilder.ConnectionTimeout = 100; stringBuilder.DataSource = host; stringBuilder.UserId = uid; stringBuilder.Password = pwd; using (TdConnection dbConnection = new TdConnection(stringBuilder.ConnectionString )) { dbConnection.Open(); TdDataAdapter adapter = new TdDataAdapter(); DataTable dt = new DataTable(); TdCommand myCommand = new TdCommand("select databasename from dbc.databases", dbConnection); TdDataReader myReader = myCommand.ExecuteReader(); while (myReader.Read()) { cboDatabaseList.Items.Add(myReader[0].ToString().Trim()); } myReader.Close(); myCommand.Dispose(); } } private void Form1_Load(object sender, EventArgs e) { frmLogin fLogin = new frmLogin(); fLogin.ShowDialog(this); if (fLogin.fCancel) Application.Exit(); else { host = fLogin.strHost; uid = fLogin.strUID; pwd = fLogin.strPWD; populateComboes(); } fLogin.Dispose(); txtSeparator.Text = SEPARATOR.ToString(); } private void cboDatabaseList_TextChanged(object sender, EventArgs e) { TdConnectionStringBuilder stringBuilder = new TdConnectionStringBuilder(); stringBuilder.CommandTimeout = 300; stringBuilder.ConnectionTimeout = 100; stringBuilder.DataSource = host; stringBuilder.UserId = uid; stringBuilder.Password = pwd; using (TdConnection dbConnection = new TdConnection(stringBuilder.ConnectionString)) { dbConnection.Open(); TdDataAdapter adapter = new TdDataAdapter(); DataTable dt = new DataTable(); TdCommand myCommand = new TdCommand("select tablename from dbc.TablesVX where databasename='" + cboDatabaseList.Text + "'", dbConnection); TdDataReader myReader = myCommand.ExecuteReader(); while (myReader.Read()) { cboTables.Items.Add(myReader[0].ToString().Trim()); } myReader.Close(); myCommand.Dispose(); } } private void cmdKo_Click(object sender, EventArgs e) { Application.Exit(); } private void addColumns(string[] headers) { dgvPreview.Columns.Clear(); for (int count = headers.GetLowerBound(0); count < headers.GetLength(0); count++) dgvPreview.Columns.Add(headers[count].ToString(), headers[count].ToString()); } private void button3_Click(object sender, EventArgs e) { dgvPreview.Rows.Clear(); using (FileStream file = new FileStream(txtFile.Text , FileMode.Open, FileAccess.Read, FileShare.Read, 4096)) { using (StreamReader reader = new StreamReader(file)) { string [] headerLine= reader.ReadLine().Replace("\"","").Split(SEPARATOR); addColumns(headerLine); while (!reader.EndOfStream) { var fields = reader.ReadLine().Replace("\"", "").Split(SEPARATOR); dgvPreview.Rows.Add(fields); } } } } } } |
- I’m not stating that this is an alternative to Teradata fast loading tools especially for big data files.
- In no way I mean that Teradata SQLAssistant won’t work, but just that LoadData has revealed to be a bit faster (take a look at the screenshot below for an idea: 1.224.916 records in about 6 minutes).
- In my experience it’s worth using LoadData for files with 1 million record max.
- We’re talking of an alpha – tough fully functioning – release. There are a lot of tests the code should go under and suggestions, bug reports and enhancements are welcome!