Data Manipulation
Data wrangling
Print some words that start with a particular string (e.g. words start with 'phy')
# If file is not specified, the file /usr/share/dict/words is used.
look phy|head -n 10
# Phil
# Philadelphia
# Philadelphia's
# Philby
# Philby's
# Philip
# Philippe
# Philippe's
# Philippians
# Philippine
Repeat printing string n times (e.g. print 'hello world' five times)
printf 'hello world\n%.0s' {1..5}
Do not echo the trailing newline
username=`echo -n "bashoneliner"`
Copy a file to multiple files (e.g copy fileA to file(B-D))
tee <fileA fileB fileC fileD >/dev/null
Delete all non-printing characters
tr -dc '[:print:]' < filename
Remove newline / nextline
tr --delete '\n' <input.txt >output.txt
Replace newline
tr '\n' ' ' <filename
To uppercase/lowercase
tr /a-z/ /A-Z/
Translate a range of characters (e.g. substitute a-z into a)
echo 'something' |tr a-z a
# aaaaaaaaa
Compare two files (e.g. fileA, fileB)
diff fileA fileB
# a: added; d:delete; c:changed
# or
sdiff fileA fileB
# side-to-side merge of file differences
Compare two files, strip trailing carriage return/ nextline (e.g. fileA, fileB)
diff fileA fileB --strip-trailing-cr
Number a file (e.g. fileA)
nl fileA
#or
nl -nrz fileA
# add leading zeros
#or
nl -w1 -s ' '
# making it simple, blank separate
Join two files field by field with tab (default join by the first column of both file, and default separator is space)
# fileA and fileB should have the same ordering of lines.
join -t '\t' fileA fileB
# Join using specified field (e.g. column 3 of fileA and column 5 of fileB)
join -1 3 -2 5 fileA fileB
Combine/ paste two or more files into columns (e.g. fileA, fileB, fileC)
paste fileA fileB fileC
# default tab separate
Group/combine rows into one row
# e.g.
# AAAA
# BBBB
# CCCC
# DDDD
cat filename|paste - -
# AAAABBBB
# CCCCDDDD
cat filename|paste - - - -
# AAAABBBBCCCCDDDD
Fastq to fasta (fastq and fasta are common file formats for bioinformatics sequence data)
cat file.fastq | paste - - - - | sed 's/^@/>/g'| cut -f1-2 | tr '\t' '\n' >file.fa
Reverse string
echo 12345| rev
Generate sequence 1-10
seq 10
Find average of input list/file of integers
i=`wc -l filename|cut -d ' ' -f1`; cat filename| echo "scale=2;(`paste -sd+`)/"$i|bc
Generate all combination (e.g. 1,2)
echo {1,2}{1,2}
# 1 1, 1 2, 2 1, 2 2
Generate all combination (e.g. A,T,C,G)
set = {A,T,C,G}
group= 5
for ((i=0; i<$group; i++));do
repetition=$set$repetition;done
bash -c "echo "$repetition""
Read file content to variable
foo=$(<test1)
Echo size of variable
echo ${#foo}
Echo a tab
echo -e ' \t '
Split file into smaller file
# Split by line (e.g. 1000 lines/smallfile)
split -d -l 1000 largefile.txt
# Split by byte without breaking lines across files
split -C 10 largefile.txt
Create a large amount of dummy files (e.g 100000 files, 10 bytes each):
#1. Create a big file
dd if=/dev/zero of=bigfile bs=1 count=1000000
#2. Split the big file to 100000 10-bytes files
split -b 10 -a 10 bigfile
Rename all files (e.g. remove ABC from all .gz files)
rename 's/ABC//' *.gz
Remove file extension (e.g remove .gz from filename.gz)
basename filename.gz .gz
zcat filename.gz> $(basename filename.gz .gz).unpacked
Add file extension to all file(e.g add .txt)
rename s/$/.txt/ *
# You can use rename -n s/$/.txt/ * to check the result first, it will only print sth like this:
# rename(a, a.txt)
# rename(b, b.txt)
# rename(c, c.txt)
Squeeze repeat patterns (e.g. /t/t --> /t)
tr -s "/t" < filename
Do not print nextline with echo
echo -e 'text here \c'
View first 50 characters of file
head -c 50 file
Cut and get last column of a file
cat file|rev | cut -d/ -f1 | rev
Add one to variable/increment/ i++ a numeric variable (e.g. $var)
((var++))
# or
var=$((var+1))
Cut the last column
cat filename|rev|cut -f1|rev
Cat to a file
cat >myfile
let me add sth here
exit by control + c
^C
Clear the contents of a file (e.g. filename)
>filename
Append to file (e.g. hihi)
echo 'hihi' >>filename
Working with json data
#install the useful jq package
#sudo apt-get install jq
#e.g. to get all the values of the 'url' key, simply pipe the json to the following jq command(you can use .[]. to select inner json, i.e jq '.[].url')
cat file.json | jq '.url'
Decimal to Binary (e.g get binary of 5)
D2B=({0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1})
echo -e ${D2B[5]}
#00000101
echo -e ${D2B[255]}
#11111111
Wrap each input line to fit in specified width (e.g 4 integers per line)
echo "00110010101110001101" | fold -w4
# 0011
# 0010
# 1011
# 1000
# 1101
Sort a file by column and keep the original order
sort -k3,3 -s
Right align a column (right align the 2nd column)
cat file.txt|rev|column -t|rev
To both view and store the output
echo 'hihihihi' | tee outputfile.txt
# use '-a' with tee to append to file.
Show non-printing (Ctrl) characters with cat
cat -v filename
Convert tab to space
expand filename
Convert space to tab
unexpand filename
Display file in octal ( you can also use od to display hexadecimal, decimal, etc)
od filename
Reverse cat a file
tac filename
Reverse the result from uniq -c
while read a b; do yes $b |head -n $a ;done <test.txt
Last updated
Was this helpful?