Write files periodically with normal mixture samples for structured streaming
This notebook can be used to write files every few seconds into the distributed file system where each of these files contains a time stamp field followed by randomly drawn words.
After running the commands in this notebook you should have a a set of files named by the minute and second for easy setting up of structured streaming jobs in another notebook.
Mixture of 2 Normals
Here we will write some Gaussian mixture samples to files.
import scala.util.Random
import scala.util.Random._
// make a sample to produce a mixture of two normal RVs with standard deviation 1 but with different location or mean parameters
def myMixtureOf2Normals( normalLocation: Double, abnormalLocation: Double, normalWeight: Double, r: Random) : (String, Double) = {
val sample = if (r.nextDouble <= normalWeight) {r.nextGaussian+normalLocation }
else {r.nextGaussian + abnormalLocation}
Thread.sleep(5L) // sleep 5 milliseconds
val now = (new java.text.SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS")).format(new java.util.Date())
return (now,sample)
}
import scala.util.Random
import scala.util.Random._
myMixtureOf2Normals: (normalLocation: Double, abnormalLocation: Double, normalWeight: Double, r: scala.util.Random)(String, Double)
val r = new Random(1L)
println(myMixtureOf2Normals(1.0, 10.0, 0.99, r), myMixtureOf2Normals(1.0, 10.0, 0.99, r))
// should always produce samples as (0.5876430182311466,-0.34037937678788865) when seed = 1L
((2020-11-16 10:47:55.774,0.5876430182311466),(2020-11-16 10:47:55.780,-0.34037937678788865))
r: scala.util.Random = scala.util.Random@cb2fa8b
display(sc.parallelize(Vector.fill(1000){myMixtureOf2Normals(1.0, 10.0, 0.99, r)}).toDF.select("_2")) // histogram of 1000 samples
_2 |
---|
1.63847575097573 |
0.8497955378433464 |
1.0173381805959432 |
-2.6960935205721848e-2 |
-6.096818465288045e-2 |
0.6235321652739503 |
1.1594225593708558 |
2.6812781205628102 |
2.3144624015522943 |
3.2746230371718874 |
0.6239556140200029 |
0.6428284914761508 |
-0.42618967795971496 |
0.4090774320731605 |
0.731226227370048 |
1.392728206581036 |
1.3354355936933495 |
0.17821385872329187 |
-0.23317608061362294 |
0.47289802886431465 |
-1.9401934414671596 |
10.214120281108658 |
1.892684662207417 |
1.0166947170672929 |
2.2709372842290798e-2 |
2.1067186310892803 |
-0.2704224394550252 |
1.1899806078296409 |
1.9798405611441416 |
1.674277523545705 |
1.7051472629603235 |
0.30230911121326054 |
1.3131677682296694 |
0.6862907308874912 |
1.5472597655237206 |
-0.21817241232347429 |
2.7926715721364213 |
1.076185529891312 |
1.553330079653454 |
1.8513287666787783 |
0.9573896481551991 |
5.970253671115444e-2 |
1.1541469640881448 |
0.2802123113351551 |
-0.5344837021313593 |
1.725283845177347 |
1.755161663154593 |
-0.5917988890618884 |
-0.16673131970385602 |
-0.8875029732034849 |
0.8698903603546103 |
2.114856053952625 |
2.497996116473529 |
9.58947465374601 |
2.9441080471551846 |
2.5155704683949165 |
0.17630763054020993 |
0.18281600980713353 |
1.9274188319901921 |
-0.5948223586642365 |
-1.2010559428888148 |
0.39178009640886724 |
1.8700313397445891 |
1.9822181107950154 |
1.2988415493692078 |
0.19172302714886202 |
1.8821092899972611 |
1.3022299642012503 |
1.4752832590499911 |
0.3609863067031439 |
1.9323022237840024 |
1.69205485183195 |
2.837382612764204 |
0.1910603148124741 |
0.341728700100859 |
2.2890102177414597 |
0.9364985206830795 |
-9.834860359562425e-2 |
1.9684980059019335 |
2.1078374167392218 |
0.8418311806515589 |
-6.692278803103968e-2 |
3.0610863387705947 |
2.5970193768494565 |
1.7322441665190011 |
1.906475635230355 |
0.7904177135359902 |
2.251629313442862 |
0.821157571251303 |
0.604717877958487 |
-2.786225541637921e-2 |
0.25604168086650014 |
2.227036911451595 |
0.5553767689608333 |
1.2188030914528114 |
0.4514774439330318 |
0.6543905674437465 |
0.40245654155097643 |
0.5060580393139191 |
0.39607300301218784 |
1.2148439132942364 |
-1.1732951662319397 |
2.73880972901814 |
1.8021623567418805 |
1.7158266876051977 |
-5.753772099978405e-2 |
0.8372348006260973 |
1.0550026092093487 |
-0.195219134074764 |
0.8383022815662486 |
1.2379684980482244 |
-0.9744247939864081 |
0.34334678488869397 |
0.9559281181537529 |
0.1404041195836977 |
-0.3013816873777868 |
2.3896906481802835 |
0.32642106560403683 |
1.1731296744694082 |
1.5569305149964348 |
1.5889387851920198 |
0.4731172095886468 |
1.8856173342031206 |
2.3639372705821358 |
-4.477980164340978e-2 |
1.0290704620759326 |
1.9189177950037783 |
-1.0270431347512887 |
0.29509917268188934 |
0.2778311015556939 |
2.0003447521256588 |
1.1150669950847454 |
0.8896045579010659 |
0.6672200055719166 |
2.835109610406893 |
0.7007330220501728 |
0.9750338145448016 |
1.5645635468047536 |
0.9674358170359617 |
0.25227970595044513 |
1.375957412863052 |
1.308418670787698 |
1.9419970123581474 |
0.8365053262598314 |
1.9114557211166892 |
3.8001770349623225 |
0.8274099015781488 |
0.8067385461002958 |
-1.4655370928162954 |
1.8238007602460073 |
0.5594066359278937 |
1.6110515128363136 |
0.6652500763228728 |
1.0793393655154422 |
2.245390820036886 |
0.35470134173051693 |
1.2952852735590668 |
1.8610829534004827 |
0.33089671577831126 |
1.1590236911322995 |
2.2514554017998076 |
1.3216086127545497 |
0.5881415761335955 |
3.0225970797989596 |
1.9673222329020743 |
1.9658660340337613 |
1.8710008504436737 |
1.845446759447753 |
0.14370718520286974 |
1.908535196201739 |
1.0842042894798714 |
2.3715209552442835 |
1.5054700276157238 |
3.2507198636175434 |
1.2191172203825102 |
0.8790791380073175 |
-1.0037247733674635 |
0.9049623487323446 |
1.2089925367368337 |
0.6882976824476612 |
2.5434935418288713 |
1.0027620856353483 |
-0.27959350084891055 |
0.3279655527731484 |
1.5786118902182438 |
-0.39629416278043617 |
1.1770224496224355 |
2.383953754814976 |
1.091832388214859 |
1.6301831536311209 |
0.935425666376106 |
2.1401208410667696 |
1.5922472337477118 |
0.34768463336762756 |
3.4257932035179848 |
0.7149821525322468 |
1.5415488633926249 |
1.6496390565891736 |
0.9593424220951506 |
0.6518631532504793 |
3.403687949320813 |
0.8320258402390563 |
0.12142126647709839 |
3.597300820980316 |
0.14741363170749655 |
0.8462912401166103 |
1.0584643703479584 |
1.1487138176792762 |
0.3488425183144027 |
1.391612358634887 |
1.505017789786627 |
0.5244087078660062 |
0.2705096241707795 |
2.216688343494393 |
-1.6589238926022496 |
1.708690947086617 |
2.961208276039951 |
6.522128902971014e-2 |
1.930831478373714 |
1.1607184811130336 |
1.6688339248285566 |
1.6470200278340348 |
2.449696037385742 |
0.8251476817941197 |
1.40403402944069 |
1.2942215447658563 |
1.7120981908967403 |
-8.929122874418227e-2 |
3.7731683503114843 |
-6.239748698038494e-2 |
1.1840870589278099 |
1.441608712447355 |
1.208878651490341 |
1.8920599151149384 |
2.527569684835118e-2 |
0.38124154100408325 |
-0.2393929330098632 |
0.1338106966810938 |
0.2676880074695901 |
2.1281668027663505 |
0.3098286754011822 |
0.4585965277423747 |
-2.5930812376542933e-2 |
2.083732337529618 |
0.6731348015628896 |
0.1312853060580106 |
1.6205120279909435 |
3.665204102481068e-2 |
0.38840609319995334 |
0.16412699262051766 |
1.0044846177991755 |
0.45217422200704216 |
-0.18519175139069355 |
-0.29646999575663124 |
0.47945009163424923 |
1.7615776520354332 |
0.35352511094407846 |
0.5902336217395934 |
1.7471513985001184 |
0.4125022414098265 |
0.7484837707176137 |
2.4227348582478037 |
1.0572595828548697 |
3.0750919548401128 |
1.0066299184508531 |
1.1704717372072126e-2 |
0.5249712753060873 |
-0.3285040378625381 |
1.342275367397042 |
0.4464461744940117 |
1.8011252317891455 |
0.814252403591333 |
1.394324297666107 |
1.746173303844829 |
0.9316948966812066 |
0.200871837759239 |
0.21504170344279683 |
1.3508727155177145 |
1.5029472339851955 |
1.3410205427121795 |
-0.29787456681066304 |
0.5368515430517165 |
0.4903528739420413 |
1.385472033421005 |
-0.2770976989604108 |
0.14383609785529106 |
2.136175000080348 |
1.3623047924484522 |
1.7982361871358026 |
0.31744493918444716 |
1.6350814336974835 |
2.1281399761210897 |
2.502566064523463 |
-0.24563123892361882 |
1.1658851718667056 |
0.9774056737281539 |
-0.9759524597979035 |
1.189619858484149 |
1.5319416027989528 |
2.1398379560783063 |
-0.8795632577020192 |
0.9740025013919826 |
2.1670377423670066 |
1.8772156837816794 |
1.4824573418919358 |
2.0936166431428 |
0.48330193647070885 |
1.1693862084127395 |
2.9630411557020633 |
0.7910395376475294 |
2.2572758761916054 |
0.5908398656455651 |
1.0801869359536456 |
1.6034174887237207 |
2.06427635708377 |
1.7447902837676048 |
2.031505217347445 |
0.7818721634007395 |
0.7546588629356998 |
0.21941395803984998 |
1.269969127498187 |
0.6995395205381358 |
1.6248745281288801 |
0.8263012944805543 |
1.4386440510670049 |
2.190301845034811 |
0.21078498872972096 |
0.9513469983933799 |
1.7125428803482596 |
1.398303536566959 |
1.1206534766009186 |
2.290467145367123 |
4.7109383568383345e-3 |
2.294594854357012 |
1.4766168147668075 |
1.1557121731747513 |
1.9080418901330192 |
1.8649147337827425 |
1.4389071560055176 |
0.10082531163297859 |
0.9702568121268303 |
2.7799453357863033 |
2.24549312231816 |
0.5235703215957042 |
4.74734605167747e-2 |
6.947954022632907e-2 |
0.3108356478112668 |
0.8394845735542491 |
1.6339257039230628 |
-7.81874305278627e-2 |
10.700496016278244 |
1.306079988619333 |
1.0105357675060997 |
0.994188218965991 |
1.8719149336462086 |
-1.802104678175969 |
1.3547952390058997 |
1.6863315414298679 |
0.4044486829589423 |
0.663151463812041 |
1.1770322429105757 |
0.352603150929493 |
-0.3331634174041469 |
0.3219528023599839 |
-4.993059725088589e-2 |
2.1359810727430015 |
6.7435821150996e-2 |
0.8152880685205637 |
1.3355438274268063 |
0.2584888411640408 |
2.1807115216184654 |
1.6447860615623693 |
1.3607208126185033 |
-0.7821045197212251 |
0.7038349029306756 |
2.1151467870965206 |
6.485239215670013e-4 |
0.7343141400907413 |
1.481231890529832 |
0.7436383571380679 |
0.3623319184166701 |
1.631447713261156 |
1.9006969436568095 |
1.4157080835736964 |
1.1280983721670481 |
0.9426963726981692 |
0.24856063619656132 |
2.255628154607799 |
2.252115121208546 |
0.7841223760628484 |
2.3421293755672297 |
2.8348182486914295 |
1.7318785148989992 |
0.9762383915277096 |
0.2616760398200866 |
-2.0755223554176574 |
0.8670675571509455 |
2.396557755329019 |
1.1056101876800475 |
0.2021889628502218 |
1.1500629266145144 |
-5.934104215058822e-2 |
1.3810015349944087 |
1.4678912177406023 |
2.673868252550129 |
1.124862297586977 |
1.1197692140235738 |
0.5797093329389853 |
0.6109162599915521 |
1.494308943924204 |
1.1433812164404558 |
-3.285316146399686e-2 |
0.6761399530014083 |
1.1616013112893129 |
0.36992246959386677 |
-0.22957870942980962 |
1.8420697014408531 |
1.668302982091929 |
-0.9984398554534477 |
1.273607856169444 |
1.050632908723346 |
3.256210339328564 |
1.0280169417144063 |
0.25615873705323133 |
1.7462784369850581 |
1.062093713031747 |
1.2932300520227344 |
-0.7938447295027886 |
-0.7609148389003666 |
1.4574415363357067 |
2.135798938708896 |
2.348280132734578 |
0.8734588034850551 |
9.973604957378543 |
0.35196987232964316 |
0.9035385778892631 |
2.831102216825725 |
1.0048753977495803 |
0.10451954987326095 |
1.1013729655881508 |
1.7585193090606088 |
1.226418453772686 |
1.4531562937415308 |
0.7619165788941504 |
0.5461009604744256 |
0.6008649427727283 |
1.3780057162734836 |
1.4622978840755851 |
0.34759263562314335 |
0.5736485977723396 |
2.3452708258883352 |
0.9744585521015179 |
1.285268161746166 |
3.3071183043196593 |
2.5080380549754593 |
1.4259274788963952 |
-2.5903667885951664e-2 |
10.986550512370714 |
-7.581817475272645e-2 |
0.6416173679276148 |
0.9534229122939114 |
2.132841239825161 |
0.6563712251065035 |
0.5765226219933468 |
1.5821498064982138 |
0.8155216333285705 |
2.405818410234607 |
2.1354048597660134 |
0.1630525718563236 |
0.8804014022168887 |
1.7677390861016287 |
-8.660285888231711e-2 |
1.4957055199119156 |
1.1686901432590528 |
1.230599828613972 |
0.8291351676730639 |
0.16369805405490445 |
1.7141910766526713 |
-6.948651912105075e-2 |
1.1025296196845764 |
2.085963027757715 |
2.3289725923192126 |
9.123696930094438e-2 |
0.7863635245315979 |
1.2210644347049109 |
2.536308464119898 |
0.7390652688383932 |
1.7955412008455056 |
-0.4063994398735684 |
0.417663857272753 |
0.7572268352036777 |
2.2956618964137903 |
0.8643802079825195 |
0.6085757743161285 |
0.8861852158953121 |
0.20486279885097103 |
1.6421903961418307 |
-1.4786102936414443 |
1.61151014329611 |
1.458781222964441 |
0.9231917785415031 |
2.987811838778618 |
-0.11728923230585608 |
-0.5667637111213064 |
0.6043249745554822 |
0.5250792304891077 |
1.7590856628461462 |
2.9472816508826574 |
-0.5091218022791495 |
0.2743637223565212 |
0.7727501145356438 |
1.402853616121213 |
1.7554959619234003 |
0.7230708651518659 |
-0.6084691576920431 |
0.17482707245078832 |
2.067354421609911 |
0.7901599165913906 |
1.652492308966036e-2 |
2.2069354262628993 |
0.5663855120607604 |
1.3454449336721601 |
2.2789771861802697 |
-0.22026555210551368 |
2.0631465437978855 |
2.039163346272715 |
-1.055015394694371 |
1.7388420238468236 |
1.872637961652257 |
3.2010804478144554 |
1.3562671867479166 |
-0.32950571604262713 |
-0.12503275455021545 |
1.277154825764409 |
0.8843613626221687 |
1.0729763052881904 |
2.3265082147700946 |
1.396457159161225 |
2.0285111744592372 |
1.330964559152572 |
2.2179887039102653 |
0.6284429614059523 |
0.3504227161829304 |
1.059416715157096 |
2.138857533311191 |
0.8094290918556053 |
0.46138825363082825 |
1.7155270828145763 |
1.2795402627927337 |
1.923106069471253 |
1.5104912344606418 |
0.5207525194340703 |
0.9946842162070768 |
3.5622068453996514e-2 |
0.18537526816937433 |
0.6111284963963912 |
0.8203863356928468 |
2.6753862253484195 |
1.650835985592774 |
1.5665183929170006 |
1.8194416800691129 |
2.0526203700510672 |
-0.651338434409015 |
1.7895684845926731 |
2.418781611162374 |
1.0787214433473757 |
0.18911021430460961 |
-0.2730870440651234 |
1.1132512450740597 |
1.0791906170600118 |
0.655348601135614 |
1.8662289569576362 |
1.627670496605692 |
11.004554142758568 |
0.1965392063488054 |
1.544545432892698 |
1.5832867870520477 |
1.8981385396481731 |
2.8183382851231302 |
0.5740375808180014 |
-0.22669316378819526 |
-0.24362648940070186 |
0.6735629913863805 |
2.602135296264379 |
8.790069946983575 |
2.5322332152585725 |
3.3409503348064518 |
0.687556087265665 |
3.0425649254546347 |
2.303391429846904 |
1.4423838621149456 |
1.4532350387273563 |
1.4533824084502533 |
3.587876263951717e-2 |
2.20315841028769 |
0.36244134638228875 |
0.6105194199863686 |
-0.45389649712820246 |
1.0252296817007915 |
3.1243573699805918 |
-0.3343748567492124 |
0.5029922338992889 |
1.7237601942074183 |
1.3511410651578275 |
1.0909840953480212 |
-0.32919830466100297 |
2.141237972293073 |
0.5392802259404192 |
2.0625972069627694 |
1.244091480416753 |
0.5541089423362922 |
0.6229771138177793 |
1.6750943089353725 |
0.6582509099634593 |
1.3833368320356623 |
1.5435711785532977 |
0.522680525295731 |
1.1915344402925043 |
0.35720513834862744 |
-8.565981412975487e-3 |
1.5372133772727767 |
5.533905500333114e-2 |
2.106400343304179 |
0.9291634254575988 |
0.85445925115937 |
1.4490678502266374 |
2.1619334568272004 |
0.2576823307255467 |
3.0652975358670402 |
1.7957493894493899 |
1.3815187097537076 |
0.7289953121054422 |
0.6648198928488573 |
2.796805936556927 |
-0.6218526047970672 |
3.443106828552411 |
0.5417960512110482 |
0.132679330973015 |
1.682704565079506 |
-0.9024146545484664 |
1.7184762053500053 |
-1.0864413777491877 |
2.4082299567533854 |
0.6867344491764446 |
0.6342469814283839 |
1.4948735109252287 |
-9.172542759290137e-3 |
1.6946662014403582 |
0.5055706186920743 |
0.8321247164414739 |
-1.9229724798355035 |
2.0668198169520764 |
0.6061498747704183 |
1.1542480718044577 |
0.8061008067230262 |
3.082304566988994 |
0.860398339444179 |
0.8735622227950341 |
1.645389530143016 |
0.31526948080501604 |
1.752922329065294 |
1.569150260594506 |
1.7989690892449675 |
1.8128536428855522 |
2.174392027870953 |
2.0629021196034616 |
2.846679590420721 |
1.6394381805033087 |
0.5575890384195411 |
-6.1635562922423226e-2 |
0.8770394819777554 |
0.18278872308418326 |
0.6653187571193362 |
0.3097532566309745 |
1.4422363582612099 |
2.2343382287346265 |
1.324757244250018 |
-0.6265562099516999 |
0.7530623041134055 |
2.4548974876953755 |
1.1505257093180181 |
0.18314366349299938 |
1.0957673061461137 |
1.5898966599318236 |
-0.8021527506395465 |
2.9919524601934695 |
0.16404921511675252 |
-1.2374220758846475 |
1.0926015540947187 |
1.7047203204003867 |
2.0361007784966425 |
-1.1129955618534204 |
1.2078806959684631 |
1.3823025668814726 |
-0.20117996650194825 |
0.4368510042955428 |
1.548393626209222 |
0.4462763885045765 |
0.9980172029792674 |
0.2244700800716185 |
-0.21800349448603162 |
0.16417229737752437 |
0.6207189588427342 |
-0.6888898628873894 |
0.5459693517409819 |
0.9025080719803394 |
-0.80260527881833 |
1.6163270811863857 |
2.633075725131599 |
-0.7773305013995544 |
0.2526721145132672 |
-0.1366344706945435 |
0.6144275159404241 |
1.97335159695082 |
0.653596005828342 |
1.6116893597735258 |
1.2014572106480943 |
1.5512973474330294 |
1.1163799230226856 |
0.7681458989542176 |
-1.3590749505030857 |
1.808641901084573 |
1.360253821157752 |
1.650932132662064 |
2.7784418591342397e-2 |
6.352095139713354e-4 |
1.441774342046809 |
1.2027746328482523 |
2.3897838732271515 |
-0.716495277439726 |
1.4815526561707035 |
0.6060868601377281 |
1.0576521934236953 |
0.48936224390584415 |
1.9354637823092111 |
0.9117750291708597 |
1.5859428172890642 |
0.3265619461022775 |
1.4075266094882544 |
0.1875530632757464 |
3.785747077498869e-2 |
0.1631893341844134 |
1.3291278928477839 |
2.052089759378511 |
1.669824081897619 |
2.1070512246301867 |
0.6995955417782742 |
0.8007672174475671 |
0.7544785261517125 |
0.35717027130006673 |
0.1475616766180351 |
1.058691342729173 |
2.066376270854084 |
0.7760634597252762 |
0.8763163198863502 |
0.7454179518243453 |
0.4854355686115821 |
1.5918037585502445 |
1.0851213695662045 |
2.0203357822531967 |
1.7808274985539043 |
3.3342822479741083 |
0.5663200540954269 |
1.2312898243010122 |
2.441121828813979 |
1.7131802924620656 |
1.8828830522974154 |
1.219662609175296 |
0.6260480088733502 |
0.6699112699464786 |
2.9616463814694187 |
0.4897205675463613 |
1.3817814430979267 |
2.368499575307164 |
0.7666593765571403 |
-0.2356306903167824 |
0.11121972846306671 |
0.13048302696943748 |
2.0296555679842934 |
0.6821008335751151 |
0.824902101020345 |
-1.4357280652706677 |
0.43551196308521123 |
0.42864594983955717 |
1.5708973296624738 |
0.7355576398800922 |
1.7698063948887048 |
1.5506318814642417 |
7.337999234267556e-2 |
-0.5590435862206975 |
2.0708510087246363 |
1.0937426555598604 |
1.866409592951821 |
1.4315729492342215 |
0.4352270706743224 |
-0.32282708992213793 |
-0.7926070985572229 |
4.1313375262139385 |
2.5182310368999583 |
1.5517727635341054 |
1.2112227339558879 |
1.088870081894894 |
1.3840364826625247 |
0.7313598601928168 |
1.4979622853866332 |
-9.589043964797783e-2 |
0.18259590774466605 |
-0.605015706579229 |
0.479252514082421 |
2.446158573827078 |
0.9738579307442236 |
0.8806136322907132 |
-1.208394918011912 |
-0.14037642146707818 |
-8.062071408017712e-2 |
0.325790154240306 |
0.2876391391859443 |
-5.4041048579440476e-2 |
0.8773670649720676 |
2.367842331811702 |
0.26889262808832914 |
1.724119815882517 |
1.7791641061471468 |
0.734736381050196 |
1.6845526251491851 |
1.621893865768243 |
2.0312929797085744 |
0.7494718435492904 |
1.2022212419280855 |
1.8619921045777033 |
0.7029888222716667 |
1.994083595934665 |
0.7072489103032473 |
-1.1377100954267716 |
2.043095143556785 |
2.166457075549966 |
0.5807453858478133 |
1.4374121122629657 |
0.954614007781135 |
1.953034562155581 |
-0.31599836594918784 |
0.4068636792746446 |
1.7729848751693704 |
0.13604733276520964 |
1.5733429242522448 |
-0.24138972935741654 |
1.9768168747035486 |
0.8346709933535629 |
-1.0458047931412482 |
0.6197520625356908 |
1.624038733446523 |
1.2010762746847274 |
-0.9819289179145085 |
2.4170778788623286 |
1.107936607297393 |
1.3942450780076066 |
1.3573378818806876 |
4.1669854205718004e-2 |
-1.0230668880126963 |
2.1672936942995924 |
2.119034877005035 |
2.617354118940299 |
0.2064667615861927 |
-0.5725286096934115 |
2.1705557671839575 |
1.074568348781358 |
1.5117862398041413 |
1.720961326096484 |
0.5484584732722113 |
-0.5487880960214739 |
0.6889819892464035 |
1.0619551547481028 |
1.211942404937342 |
1.6696784059290148 |
1.3761977731682555 |
3.071527103226564 |
0.8205571465304501 |
0.27290043128415853 |
1.0653401225764465 |
1.1614811977828927 |
1.7383121079945434 |
-0.1599579688189814 |
0.7161844910476673 |
1.5825851430026225 |
1.729082954840492 |
2.9837819396012546 |
0.6578883627962699 |
1.1377610134374765 |
-0.6323774087781868 |
0.46007501303967147 |
2.6521967942692486 |
-6.125168548660631e-2 |
2.9103478438885473 |
0.39594395800598337 |
2.1923535711428848 |
1.4697826622154868 |
0.30222917190196086 |
1.459617110508502 |
1.5211379317237608 |
0.5572602929344768 |
1.6823380276685338 |
1.5547569617336123 |
0.967365035197969 |
0.7566178166538748 |
2.1650489378269775 |
2.4253722899093866 |
10.23186845160902 |
0.1488435276719955 |
1.334317069373473 |
0.13187282300981706 |
1.9930419173328975 |
0.1443423696606898 |
0.6007539040372134 |
1.886587311852184 |
0.28914373993637366 |
1.9776682498960527 |
2.4758782746217123 |
2.7070897621357446 |
0.9472622505602268 |
1.2602098520570666 |
-1.7566407142390892 |
1.5745734897235473 |
0.6130201020930863 |
1.0358032358478906 |
1.5052778526230153 |
-1.0672425439622724 |
1.5281373508721496 |
0.5223419811330235 |
1.8542151442789314 |
1.2285178570916262 |
2.7988645052766676 |
-0.861627034243382 |
0.9294699314786086 |
2.2360036137643347 |
0.28408155798843726 |
0.943759844028374 |
1.4808042306276197 |
8.976182739712016 |
3.5904784799076 |
1.244680688295711 |
2.015594144111281 |
1.6682081811950065 |
1.3062492721042696 |
0.9597597738929083 |
0.14081110195632118 |
0.6802144777826231 |
1.7427436989859513 |
1.025119809243448 |
2.363808744264097 |
1.098939384496553 |
2.365900700802084 |
0.9639713759620857 |
1.1879608636926524 |
0.2648582754089548 |
1.6006017120486895 |
7.519379906201751e-2 |
0.2198949605081273 |
1.5190910869919414 |
1.3945275248631226 |
0.651205917924697 |
0.6910676581885502 |
2.041288605562429 |
1.3322502409533927 |
0.8190849792925787 |
0.7731687411115308 |
2.052287561083507 |
1.8008966716911172 |
1.231841585551883 |
1.6898412909890235 |
1.2644139086012807 |
2.7531121025849865 |
0.30461789013034024 |
0.7788454265121008 |
1.3219507722927721 |
-0.45523491060288057 |
1.2677313887765018 |
1.7996640101307269 |
0.4670890255653002 |
2.4769073718909094 |
0.3494158442146792 |
0.22328815419586 |
0.34206270424196084 |
-0.12746179104408784 |
2.197149144795371 |
-0.6353823443308804 |
1.0875581317673213 |
0.9444545881585366 |
1.965966128848457 |
0.5039931206538372 |
1.2106483228588576 |
0.6580705742678696 |
2.3578096465318747 |
-0.5066449988185571 |
1.0762171812349997 |
1.0356862000259204 |
-0.6471662591053053 |
1.8235922438459014 |
1.0820154631653591 |
2.3564967540496595 |
1.6506535694050806 |
dbutils.fs.rm("/datasets/streamingFilesNormalMixture/",true) // this is to delete the directory before staring a job
res2: Boolean = false
val r = new Random(12345L) // set seed for reproducibility
var a = 0;
// for loop execution to write files to distributed fs
for( a <- 1 to 5){
// make a DataSet
val data = sc.parallelize(Vector.fill(100){myMixtureOf2Normals(1.0, 10.0, 0.99, r)}) // 100 samples from mixture
.coalesce(1) // this is to make sure that we have only one partition per dir
.toDF.as[(String,Double)]
val minute = (new java.text.SimpleDateFormat("mm")).format(new java.util.Date())
val second = (new java.text.SimpleDateFormat("ss")).format(new java.util.Date())
// write to dbfs
data.write.mode(SaveMode.Overwrite).csv("/datasets/streamingFilesNormalMixture/" + minute +"_" + second)
Thread.sleep(5000L) // sleep 5 seconds
}
r: scala.util.Random = scala.util.Random@5704bfda
a: Int = 0
display(dbutils.fs.ls("/datasets/streamingFilesNormalMixture/"))
path | name | size |
---|---|---|
dbfs:/datasets/streamingFilesNormalMixture/48_11/ | 48_11/ | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_19/ | 48_19/ | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_26/ | 48_26/ | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_36/ | 48_36/ | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_43/ | 48_43/ | 0.0 |
display(dbutils.fs.ls("/datasets/streamingFilesNormalMixture/48_43/"))
path | name | size |
---|---|---|
dbfs:/datasets/streamingFilesNormalMixture/48_43/_SUCCESS | _SUCCESS | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_43/_committed_5911069874541273534 | _committed_5911069874541273534 | 115.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_43/_started_5911069874541273534 | _started_5911069874541273534 | 0.0 |
dbfs:/datasets/streamingFilesNormalMixture/48_43/part-00000-tid-5911069874541273534-d96c7c40-0395-40b6-a223-79c4cdb475c8-35610-1-c000.csv | part-00000-tid-5911069874541273534-d96c7c40-0395-40b6-a223-79c4cdb475c8-35610-1-c000.csv | 4310.0 |
Take a peek at what was written.
val df_csv = spark.read.option("inferSchema", "true").csv("/datasets/streamingFilesNormalMixture/48_43/*.csv")
df_csv: org.apache.spark.sql.DataFrame = [_c0: string, _c1: double]
df_csv.count() // 100 samples per file
res8: Long = 100
df_csv.show(10,false) // first 10
+-----------------------+--------------------+
|_c0 |_c1 |
+-----------------------+--------------------+
|2020-11-16 10:48:42.690|2.0531657985840983 |
|2020-11-16 10:48:42.696|1.7928797637680196 |
|2020-11-16 10:48:42.701|2.9329556976986013 |
|2020-11-16 10:48:42.706|1.1087520027663345 |
|2020-11-16 10:48:42.711|1.2115868818351045 |
|2020-11-16 10:48:42.716|1.9163661519192294 |
|2020-11-16 10:48:42.722|1.6917128257752045 |
|2020-11-16 10:48:42.727|1.0095879056962782 |
|2020-11-16 10:48:42.732|-0.13611276130309613|
|2020-11-16 10:48:42.737|2.2939319088848023 |
+-----------------------+--------------------+
only showing top 10 rows